@incollection{benjamin_memory_2007,
	title = {Memory is More than just Remembering: Strategic Control of Encoding, Accessing Memory, and Making Decisions},
	volume = {48},
	isbn = {978-0-12-373607-9},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0079742107480057},
	shorttitle = {Memory is More than just Remembering},
	pages = {175--223},
	booktitle = {Psychology of Learning and Motivation},
	publisher = {Elsevier},
	author = {Benjamin, Aaron S.},
	urldate = {2024-02-10},
	date = {2007},
	langid = {english},
	doi = {10.1016/S0079-7421(07)48005-7},
}

@article{nussenbaum_memorys_2020,
	title = {Memory’s reflection of learned information value increases across development.},
	volume = {149},
	issn = {1939-2222, 0096-3445},
	url = {https://doi.apa.org/doi/10.1037/xge0000753},
	doi = {10.1037/xge0000753},
	pages = {1919--1934},
	number = {10},
	journaltitle = {Journal of Experimental Psychology: General},
	shortjournal = {Journal of Experimental Psychology: General},
	author = {Nussenbaum, Kate and Prentis, Euan and Hartley, Catherine A.},
	urldate = {2024-02-10},
	date = {2020-10},
	langid = {english},
}

@article{schacter_future_2012,
	title = {The Future of Memory: Remembering, Imagining, and the Brain},
	volume = {76},
	issn = {08966273},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0896627312009919},
	doi = {10.1016/j.neuron.2012.11.001},
	shorttitle = {The Future of Memory},
	pages = {677--694},
	number = {4},
	journaltitle = {Neuron},
	shortjournal = {Neuron},
	author = {Schacter, Daniel L. and Addis, Donna Rose and Hassabis, Demis and Martin, Victoria C. and Spreng, R. Nathan and Szpunar, Karl K.},
	urldate = {2024-02-10},
	date = {2012-11},
	langid = {english},
}

@article{camina_neuroanatomical_2017,
	title = {The Neuroanatomical, Neurophysiological and Psychological Basis of Memory: Current Models and Their Origins},
	volume = {8},
	issn = {1663-9812},
	doi = {10.3389/fphar.2017.00438},
	shorttitle = {The Neuroanatomical, Neurophysiological and Psychological Basis of Memory},
	abstract = {This review aims to classify and clarify, from a neuroanatomical, neurophysiological, and psychological perspective, different memory models that are currently widespread in the literature as well as to describe their origins. We believe it is important to consider previous developments without which one cannot adequately understand the kinds of models that are now current in the scientific literature. This article intends to provide a comprehensive and rigorous overview for understanding and ordering the latest scientific advances related to this subject. The main forms of memory presented include sensory memory, short-term memory, and long-term memory. Information from the world around us is first stored by sensory memory, thus enabling the storage and future use of such information. Short-term memory (or memory) refers to information processed in a short period of time. Long-term memory allows us to store information for long periods of time, including information that can be retrieved consciously (explicit memory) or unconsciously (implicit memory).},
	pages = {438},
	journaltitle = {Frontiers in Pharmacology},
	shortjournal = {Front Pharmacol},
	author = {Camina, Eduardo and Güell, Francisco},
	date = {2017},
	pmid = {28713278},
	pmcid = {PMC5491610},
	keywords = {explicit memory, implicit memory, long-term memory, sensory memory, short-term memory},
}

@article{nairne_adaptive_2016,
	title = {Adaptive Memory: The Evolutionary Significance of Survival Processing},
	volume = {11},
	issn = {1745-6916, 1745-6924},
	url = {http://journals.sagepub.com/doi/10.1177/1745691616635613},
	doi = {10.1177/1745691616635613},
	shorttitle = {Adaptive Memory},
	abstract = {A few seconds of survival processing, during which people assess the relevance of information to a survival situation, produces particularly good retention. One interpretation of this benefit is that our memory systems are optimized to process and retain fitness-relevant information. Such a “tuning” may exist, in part, because our memory systems were shaped by natural selection, using a fitness-based criterion. However, recent research suggests that traditional mnemonic processes, such as elaborative processing, may play an important role in producing the empirical benefit. Boundary conditions have been demonstrated as well, leading some to dismiss evolutionary interpretations of the phenomenon. In this article, we discuss the current state of the evolutionary account and provide a general framework for evaluating evolutionary and purportedly nonevolutionary interpretations of mnemonic phenomena. We suggest that survival processing effects are best viewed within the context of a general survival optimization system, designed by nature to help organisms deal with survival challenges. An important component of survival optimization is the ability to simulate activities that help to prevent or escape from future threats which, in turn, depends in an important way on accurate retrospective remembering of survival-relevant information.},
	pages = {496--511},
	number = {4},
	journaltitle = {Perspectives on Psychological Science},
	shortjournal = {Perspect Psychol Sci},
	author = {Nairne, James S. and Pandeirada, Josefa N. S.},
	urldate = {2024-02-10},
	date = {2016-07},
	langid = {english},
}

@incollection{atkinson_human_1968,
	title = {Human Memory: A Proposed System and its Control Processes},
	volume = {2},
	isbn = {978-0-12-543302-0},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0079742108604223},
	shorttitle = {Human Memory},
	pages = {89--195},
	booktitle = {Psychology of Learning and Motivation},
	publisher = {Elsevier},
	author = {Atkinson, R.C. and Shiffrin, R.M.},
	urldate = {2024-02-23},
	date = {1968},
	langid = {english},
	doi = {10.1016/S0079-7421(08)60422-3},
}

@article{baddeley_episodic_2000,
	title = {The episodic buffer: a new component of working memory?},
	volume = {4},
	issn = {13646613},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1364661300015382},
	doi = {10.1016/S1364-6613(00)01538-2},
	shorttitle = {The episodic buffer},
	pages = {417--423},
	number = {11},
	journaltitle = {Trends in Cognitive Sciences},
	shortjournal = {Trends in Cognitive Sciences},
	author = {Baddeley, Alan},
	urldate = {2024-02-23},
	date = {2000-11},
	langid = {english},
}

@article{smith_multiple_2008,
	title = {Multiple systems of category learning},
	volume = {32},
	issn = {0149-7634},
	doi = {10.1016/j.neubiorev.2007.07.009},
	abstract = {We review neuropsychological and neuroimaging evidence for the existence of three qualitatively different categorization systems. These categorization systems are themselves based on three distinct memory systems: working memory ({WM}), explicit long-term memory (explicit {LTM}), and implicit long-term memory (implicit {LTM}). We first contrast categorization based on {WM} with that based on explicit {LTM}, where the former typically involves applying rules to a test item and the latter involves determining the similarity between stored exemplars or prototypes and a test item. Neuroimaging studies show differences between brain activity in normal participants as a function of whether they are instructed to categorize novel test items by rule or by similarity to known category members. Rule instructions typically lead to more activation in frontal or parietal areas, associated with {WM} and selective attention, whereas similarity instructions may activate parietal areas associated with the integration of perceptual features. Studies with neurological patients in the same paradigms provide converging evidence, e.g., patients with Alzheimer's disease, who have damage in prefrontal regions, are more impaired with rule than similarity instructions. Our second contrast is between categorization based on explicit {LTM} with that based on implicit {LTM}. Neuropsychological studies with patients with medial-temporal lobe damage show that patients are impaired on tasks requiring explicit {LTM}, but perform relatively normally on an implicit categorization task. Neuroimaging studies provide converging evidence: whereas explicit categorization is mediated by activation in numerous frontal and parietal areas, implicit categorization is mediated by a deactivation in posterior cortex.},
	pages = {249--264},
	number = {2},
	journaltitle = {Neuroscience and Biobehavioral Reviews},
	shortjournal = {Neurosci Biobehav Rev},
	author = {Smith, Edward E. and Grossman, Murray},
	date = {2008},
	pmid = {17904637},
	pmcid = {PMC2735458},
	keywords = {Humans, Cerebral Cortex, Memory, Brain Mapping, Classification, Concept Formation, Discrimination Learning},
}

@article{renoult_knowing_2019,
	title = {From Knowing to Remembering: The Semantic-Episodic Distinction},
	volume = {23},
	issn = {1879-307X},
	doi = {10.1016/j.tics.2019.09.008},
	shorttitle = {From Knowing to Remembering},
	abstract = {The distinction between episodic and semantic memory was first proposed in 1972 by Endel Tulving and is still of central importance in cognitive neuroscience. However, data obtained over the past 30 years or so support the idea that the frontiers between perception and knowledge and between episodic and semantic memory are not as clear cut as previously thought, prompting a rethink of the episodic-semantic distinction. Here, we review recent research on episodic and semantic memory, highlighting similarities between the two systems. Taken together, current behavioral, neuropsychological, and neuroimaging data are compatible with the idea that episodic and semantic memory are inextricably intertwined, yet retain a measure of distinctiveness, despite the fact that their neural correlates demonstrate considerable overlap.},
	pages = {1041--1057},
	number = {12},
	journaltitle = {Trends in Cognitive Sciences},
	shortjournal = {Trends Cogn Sci},
	author = {Renoult, Louis and Irish, Muireann and Moscovitch, Morris and Rugg, Michael D.},
	date = {2019-12},
	pmid = {31672430},
	keywords = {Humans, Animals, Brain, Knowledge, Semantics, Models, Theoretical, Memory, anterior temporal lobe, conceptual knowledge, episodic memory, hippocampus, reinstatement, semantic memory},
}

@article{kazanas_survival_2015,
	title = {The Survival Advantage: Underlying Mechanisms and Extant Limitations},
	volume = {13},
	issn = {1474-7049, 1474-7049},
	url = {http://journals.sagepub.com/doi/10.1177/147470491501300204},
	doi = {10.1177/147470491501300204},
	shorttitle = {The Survival Advantage},
	abstract = {Recently, researchers have begun to investigate the function of memory in our evolutionary history. According to Nairne and colleagues (e.g., Nairne, Pandeirada, and Thompson, 2008 ; Nairne, Thompson, and Pandeirada, 2007 ), the best mnemonic strategy for learning lists of unrelated words may be one that addresses the same problems that our Pleistocene ancestors faced: fitness-relevant problems including securing food and water, as well as protecting themselves from predators. Survival processing has been shown to promote better recall and recognition memory than many well-known mnemonic strategies (e.g., pleasantness ratings, imagery, generation, etc.). However, the survival advantage does not extend to all types of stimuli and tasks. The current review presents research that has replicated Nairne et al.'s (2007) original findings, in addition to the research designs that fail to replicate the survival advantage. In other words, there are specific manipulations in which survival processing does not appear to benefit memory any more than other strategies. Potential mechanisms for the survival advantage are described, with an emphasis on those that are the most plausible. These proximate mechanisms outline the memory processes that may contribute to the advantage, although the ultimate mechanism may be the congruity between the survival scenario and Pleistocene problem-solving.},
	pages = {147470491501300},
	number = {2},
	journaltitle = {Evolutionary Psychology},
	shortjournal = {Evol Psychol},
	author = {Kazanas, Stephanie A. and Altarriba, Jeanette},
	urldate = {2024-02-23},
	date = {2015-04-01},
	langid = {english},
}

@article{blumenfeld_lateral_2019,
	title = {The lateral prefrontal cortex and human long-term memory},
	volume = {163},
	issn = {0072-9752},
	doi = {10.1016/B978-0-12-804281-6.00012-4},
	abstract = {Recent research has demonstrated that the lateral prefrontal cortex is extensively involved in human memory, including working memory processes that support retention of information across short delays, and episodic long-term memory encoding and retrieval processes. This chapter reviews results from neuroimaging studies of memory, from noninvasive brain stimulation studies of memory, and from studies of memory in patients with prefrontal lesions. The available evidence is consistent with the idea that different prefrontal regions implement cognitive or executive control processes that support working memory and episodic long-term memory encoding and retrieval.},
	pages = {221--235},
	journaltitle = {Handbook of Clinical Neurology},
	shortjournal = {Handb Clin Neurol},
	author = {Blumenfeld, Robert S. and Ranganath, Charan},
	date = {2019},
	pmid = {31590732},
	keywords = {Humans, Electric Stimulation, Neuroimaging, Neuropsychological Tests, Memory, Brain Mapping, Control, Episodic, Executive, {fMRI}, Frontal, Functional, Functional Laterality, Human, Learning, Lesion, Long term, Magnetic resonance imaging, Memory, Long-Term, Monkey, Neuropsychology, Patient, Prefrontal, Prefrontal Cortex, Recognition, Semantic, Short term, Working},
}

@article{simmons_anterior_2009,
	title = {The anterior temporal lobes and the functional architecture of semantic memory},
	volume = {15},
	issn = {1469-7661},
	doi = {10.1017/S1355617709990348},
	abstract = {Recently, three accounts have emerged on the role of the anterior temporal lobes ({ATLs}) in semantic memory. One account claims that the {ATLs} are domain-general semantic hubs, another claims that they underlie knowledge of unique entities specifically, and yet another account claims that they support social conceptual knowledge generally. Here, we review neuropsychological and neuroimaging studies that bear on these three accounts and offer suggestions for future research to elucidate the roles of the {ATLs} in semantic memory.},
	pages = {645--649},
	number = {5},
	journaltitle = {Journal of the International Neuropsychological Society: {JINS}},
	shortjournal = {J Int Neuropsychol Soc},
	author = {Simmons, W. Kyle and Martin, Alex},
	date = {2009-09},
	pmid = {19631024},
	pmcid = {PMC2791360},
	keywords = {Humans, Temporal Lobe, Neuropsychological Tests, Semantics, Magnetic Resonance Imaging, Memory, Positron-Emission Tomography},
}

@article{squire_medial_1991,
	title = {The Medial Temporal Lobe Memory System},
	volume = {253},
	issn = {0036-8075, 1095-9203},
	url = {https://www.science.org/doi/10.1126/science.1896849},
	doi = {10.1126/science.1896849},
	abstract = {Studies of human amnesia and studies of an animal model of human amnesia in the monkey have identified the anatomical components of the brain system for memory in the medial temporal lobe and have illuminated its function. This neural system consists of the hippocampus and adjacent, anatomically related cortex, including entorhinal, perirhinal, and parahippocampal cortices. These structures, presumably by virtue of their widespread and reciprocal connections with neocortex, are essential for establishing long-term memory for facts and events (declarative memory). The medial temporal lobe memory system is needed to bind together the distributed storage sites in neocortex that represent a whole memory. However, the role of this system is only temporary. As time passes after learning, memory stored in neocortex gradually becomes independent of medial temporal lobe structures.},
	pages = {1380--1386},
	number = {5026},
	journaltitle = {Science},
	shortjournal = {Science},
	author = {Squire, Larry R. and Zola-Morgan, Stuart},
	urldate = {2024-02-23},
	date = {1991-09-20},
	langid = {english},
}

@article{collins_spreading_activation_1975,
	title = {A spreading-activation theory of semantic processing},
	volume = {82},
	issn = {1939-1471, 0033-295X},
	url = {https://doi.apa.org/doi/10.1037/0033-295X.82.6.407},
	doi = {10.1037/0033-295X.82.6.407},
	pages = {407--428},
	number = {6},
	journaltitle = {Psychological Review},
	shortjournal = {Psychological Review},
	author = {Collins, Allan M. and Loftus, Elizabeth F.},
	urldate = {2024-02-10},
	date = {1975-11},
	langid = {english},
}

@article{casasanto_time_2008,
	title = {Time in the mind: Using space to think about time},
	volume = {106},
	issn = {00100277},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S001002770700087X},
	doi = {10.1016/j.cognition.2007.03.004},
	shorttitle = {Time in the mind},
	pages = {579--593},
	number = {2},
	journaltitle = {Cognition},
	shortjournal = {Cognition},
	author = {Casasanto, Daniel and Boroditsky, Lera},
	urldate = {2024-02-22},
	date = {2008-02},
	langid = {english},
}

@article{lehmann_semantic_1992,
	title = {Semantic networks},
	volume = {23},
	issn = {08981221},
	url = {https://linkinghub.elsevier.com/retrieve/pii/0898122192901355},
	doi = {10.1016/0898-1221(92)90135-5},
	pages = {1--50},
	number = {2},
	journaltitle = {Computers \& Mathematics with Applications},
	shortjournal = {Computers \& Mathematics with Applications},
	author = {Lehmann, Fritz},
	urldate = {2024-02-22},
	date = {1992-01},
	langid = {english},
}

@article{anderson_spreading_1983,
	title = {A spreading activation theory of memory},
	volume = {22},
	issn = {00225371},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0022537183902013},
	doi = {10.1016/S0022-5371(83)90201-3},
	pages = {261--295},
	number = {3},
	journaltitle = {Journal of Verbal Learning and Verbal Behavior},
	shortjournal = {Journal of Verbal Learning and Verbal Behavior},
	author = {Anderson, John R.},
	urldate = {2024-02-10},
	date = {1983-06},
	langid = {english},
}

@article{james_stability_2018,
	title = {The Stability of Implicit Racial Bias in Police Officers},
	volume = {21},
	issn = {1098-6111, 1552-745X},
	url = {http://journals.sagepub.com/doi/10.1177/1098611117732974},
	doi = {10.1177/1098611117732974},
	abstract = {Research on police officers has found that they tend to associate African Americans with threat. Little is known however about the stability of implicit racial bias in police officers, whose attitudes could be expected to fluctuate based on their day-to-day encounters or from internal stressors such as fatigue. To investigate, this study tested 80 police officers using the Weapons Implicit Association Test ({IAT}) on four separate occasions. Officers’ sleep was also monitored using wrist actigraphy. Officers’ {IAT} scores varied significantly across the testing days ( f = 2.36; df = 1.468; p {\textless} .05), and differences in {IAT} scores were associated with officers’ sleep ( f = 6.49; df = 1.468; p {\textless} .05). These findings indicate that implicit racial bias was not stable among officers, and that when officers slept less prior to testing they demonstrated stronger association between Black Americans and weapons. The implications of these findings within the current climate of police–citizen unrest are discussed.},
	pages = {30--52},
	number = {1},
	journaltitle = {Police Quarterly},
	shortjournal = {Police Quarterly},
	author = {James, Lois},
	urldate = {2024-02-22},
	date = {2018-03},
	langid = {english},
}

@article{lee_comparison_2013,
	title = {A Comparison of Implicit and Explicit Attitude Measures: An Application of the Implicit Association Test ({IAT}) to Fast Food Restaurant Brands},
	volume = {18},
	issn = {1083-5423},
	url = {https://www.ingentaconnect.com/content/10.3727/108354213X13645733247576},
	doi = {10.3727/108354213X13645733247576},
	shorttitle = {A Comparison of Implicit and Explicit Attitude Measures},
	abstract = {The study attempted to assess people's attitudes toward fast food restaurant brands by comparing the results of explicit and implicit attitude measures. A total of 60 college students (33 Americans and 27 Koreans) were recruited to complete both a self-report survey and the Implicit
 Association Test ({IAT}). Two known fast food restaurant brands were selected and the results of this study show that Korean respondents have inconsistent response patterns in explicit/implicit measures toward the two brands, while consistent patterns are found among Americans in the same context.
 In terms of the correlation results, it was observed that both explicit and implicit attitude measures are significantly correlated with the frequency of fast food restaurant visits ({FFRV}) among Koreans, whereas explicit liking was not correlated with {FFRV} in the significant correlations between
 other variables ({GEA} and {IA}) among Americans. The researchers' elaborations on the literature and the experimental study reveal several conclusions, recommendations, and implications.},
	pages = {119--131},
	number = {2},
	journaltitle = {Tourism Analysis},
	shortjournal = {Tourism Analysis},
	author = {Lee, Kwang-Ho and Kim, Dae-Young},
	urldate = {2024-02-22},
	date = {2013-03-01},
	langid = {english},
}

@article{karpinski_attitude_2005,
	title = {Attitude Importance as a Moderator of the Relationship Between Implicit and Explicit Attitude Measures},
	volume = {31},
	issn = {0146-1672, 1552-7433},
	url = {http://journals.sagepub.com/doi/10.1177/0146167204273007},
	doi = {10.1177/0146167204273007},
	abstract = {The authors examined attitude importance as a moderator of the relationship between the Implicit Association Test ({IAT}) and explicit attitude measures. In Study 1 (N = 194), as ratings of attitude importance regarding the 2000 presidential election increased, the strength of the relationship between a Bush-Gore {IAT} and explicit attitude measures also increased. Study 2 provided a conceptual replication of these results using attitudes toward Coke and Pepsi (N = 112). In addition, across both studies, explicit attitude measures were better predictors of deliberative behaviors than {IAT} scores. In Study 3 (N = 77), the authors examined the role of elaboration as a mechanism by which attitude importance may moderate {IAT}-explicit attitude correlations. As predicted, increased elaboration resulted in stronger {IAT}-explicit attitude correlations. Other possible mechanisms by which attitude importance may moderate the {IAT}-explicit attitude relationship also are discussed.},
	pages = {949--962},
	number = {7},
	journaltitle = {Personality and Social Psychology Bulletin},
	shortjournal = {Pers Soc Psychol Bull},
	author = {Karpinski, Andrew and Steinman, Ross B. and Hilton, James L.},
	urldate = {2024-02-22},
	date = {2005-07},
	langid = {english},
}

@inproceedings{mhatre_homogeneous_2004,
	location = {Paris, France},
	title = {Homogeneous vs heterogeneous clustered sensor networks: a comparative study},
	isbn = {978-0-7803-8533-7},
	url = {http://ieeexplore.ieee.org/document/1313223/},
	doi = {10.1109/ICC.2004.1313223},
	shorttitle = {Homogeneous vs heterogeneous clustered sensor networks},
	eventtitle = {2004 {IEEE} International Conference on Communications ({IEEE} Cat. No.04CH37577)},
	pages = {3646--3651 Vol.6},
	booktitle = {2004 {IEEE} International Conference on Communications ({IEEE} Cat. No.04CH37577)},
	publisher = {{IEEE}},
	author = {Mhatre, V. and Rosenberg, C.},
	urldate = {2024-03-08},
	date = {2004},
}

@inproceedings{sharifian_hierarchical_1997,
	title = {Hierarchical spreading of activation},
	pages = {1--10},
	booktitle = {Proc. of the Conference on Language, Cognition, and Interpretation},
	publisher = {{IAU} Press Isfahan},
	author = {Sharifian, Farzad and Samani, Ramin},
	date = {1997},
	keywords = {No DOI found},
}

@article{greenwald_measuring_1998,
	title = {Measuring individual differences in implicit cognition: The implicit association test.},
	volume = {74},
	issn = {1939-1315, 0022-3514},
	url = {https://doi.apa.org/doi/10.1037/0022-3514.74.6.1464},
	doi = {10.1037/0022-3514.74.6.1464},
	shorttitle = {Measuring individual differences in implicit cognition},
	pages = {1464--1480},
	number = {6},
	journaltitle = {Journal of Personality and Social Psychology},
	shortjournal = {Journal of Personality and Social Psychology},
	author = {Greenwald, Anthony G. and {McGhee}, Debbie E. and Schwartz, Jordan L. K.},
	urldate = {2024-02-22},
	date = {1998},
	langid = {english},
}

@article{greenwald_understanding_2009,
	title = {Understanding and using the Implicit Association Test: {III}. Meta-analysis of predictive validity.},
	volume = {97},
	issn = {1939-1315, 0022-3514},
	url = {https://doi.apa.org/doi/10.1037/a0015575},
	doi = {10.1037/a0015575},
	shorttitle = {Understanding and using the Implicit Association Test},
	pages = {17--41},
	number = {1},
	journaltitle = {Journal of Personality and Social Psychology},
	shortjournal = {Journal of Personality and Social Psychology},
	author = {Greenwald, Anthony G. and Poehlman, T. Andrew and Uhlmann, Eric Luis and Banaji, Mahzarin R.},
	urldate = {2024-02-22},
	date = {2009-07},
	langid = {english},
}

@article{lawlor_mendelian_2008,
	title = {Mendelian randomization: Using genes as instruments for making causal inferences in epidemiology},
	volume = {27},
	issn = {0277-6715, 1097-0258},
	url = {https://onlinelibrary.wiley.com/doi/10.1002/sim.3034},
	doi = {10.1002/sim.3034},
	shorttitle = {Mendelian randomization},
	abstract = {Abstract
            Observational epidemiological studies suffer from many potential biases, from confounding and from reverse causation, and this limits their ability to robustly identify causal associations. Several high‐profile situations exist in which randomized controlled trials of precisely the same intervention that has been examined in observational studies have produced markedly different findings. In other observational sciences, the use of instrumental variable ({IV}) approaches has been one approach to strengthening causal inferences in non‐experimental situations. The use of germline genetic variants that proxy for environmentally modifiable exposures as instruments for these exposures is one form of {IV} analysis that can be implemented within observational epidemiological studies. The method has been referred to as ‘Mendelian randomization’, and can be considered as analogous to randomized controlled trials. This paper outlines Mendelian randomization, draws parallels with {IV} methods, provides examples of implementation of the approach and discusses limitations of the approach and some methods for dealing with these. Copyright © 2007 John Wiley \& Sons, Ltd.},
	pages = {1133--1163},
	number = {8},
	journaltitle = {Statistics in Medicine},
	shortjournal = {Statistics in Medicine},
	author = {Lawlor, Debbie A. and Harbord, Roger M. and Sterne, Jonathan A. C. and Timpson, Nic and Davey Smith, George},
	urldate = {2024-02-22},
	date = {2008-04-15},
	langid = {english},
}

@misc{han_rational_2019,
	title = {Rational Impatience Admission Control in 5G-sliced Networks: Shall I Bide my Slice Opportunity?},
	url = {http://arxiv.org/abs/1809.06815},
	doi = {10.48550/arXiv.1809.06815},
	shorttitle = {Rational Impatience Admission Control in 5G-sliced Networks},
	abstract = {Recently, the specifications of the fifth generation (5G) of mobile networks have been released witnessing the industrial interest around the novel network slicing concept. This context is further enriched by the evolution of an emerging type of public cloud environment: the Slice-as-a-Service ({SlaaS}). Infrastructure providers or network operators deploy different admission strategy while processing network slice requests issued by infrastructure tenants based on service level agreements ({SLAs}) and current (and predicted) resource availability. However, when the service demand will reasonably get dense, congestions of slice requests may occur leading to long waiting periods. This may turn into impatient tenant behaviors that mitigate potential loss. This paper studies the rational strategies of impatient tenants waiting in queue-based slice admission control system, proving mathematically and empirically the benefits of allowing infrastructure provider to share its information with the upcoming tenants.},
	number = {{arXiv}:1809.06815},
	publisher = {{arXiv}},
	author = {Han, Bin and Feng, Di and Sciancalepore, Vincenzo and Schotten, Hans D.},
	urldate = {2024-02-22},
	date = {2019-12-04},
	eprinttype = {arxiv},
	eprint = {1809.06815 [cs]},
	keywords = {Computer Science - Networking and Internet Architecture},
}

@article{mayberry_neurolinguistic_2018,
	title = {Neurolinguistic processing when the brain matures without language},
	volume = {99},
	issn = {00109452},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0010945217304197},
	doi = {10.1016/j.cortex.2017.12.011},
	pages = {390--403},
	journaltitle = {Cortex},
	shortjournal = {Cortex},
	author = {Mayberry, Rachel I. and Davenport, Tristan and Roth, Austin and Halgren, Eric},
	urldate = {2024-02-22},
	date = {2018-02},
	langid = {english},
}

@article{zhang_algorithm_2023,
	title = {Algorithm for optimized {mRNA} design improves stability and immunogenicity},
	volume = {621},
	issn = {1476-4687},
	doi = {10.1038/s41586-023-06127-z},
	abstract = {Messenger {RNA} ({mRNA}) vaccines are being used to combat the spread of {COVID}-19 (refs. 1-3), but they still exhibit critical limitations caused by {mRNA} instability and degradation, which are major obstacles for the storage, distribution and efficacy of the vaccine products4. Increasing secondary structure lengthens {mRNA} half-life, which, together with optimal codons, improves protein expression5. Therefore, a principled {mRNA} design algorithm must optimize both structural stability and codon usage. However, owing to synonymous codons, the {mRNA} design space is prohibitively large-for example, there are around 2.4 × 10632 candidate {mRNA} sequences for the {SARS}-{CoV}-2 spike protein. This poses insurmountable computational challenges. Here we provide a simple and unexpected solution using the classical concept of lattice parsing in computational linguistics, where finding the optimal {mRNA} sequence is analogous to identifying the most likely sentence among similar-sounding alternatives6. Our algorithm {LinearDesign} finds an optimal {mRNA} design for the spike protein in just 11 minutes, and can concurrently optimize stability and codon usage. {LinearDesign} substantially improves {mRNA} half-life and protein expression, and profoundly increases antibody titre by up to 128 times in mice compared to the codon-optimization benchmark on {mRNA} vaccines for {COVID}-19 and varicella-zoster virus. This result reveals the great potential of principled {mRNA} design and enables the exploration of previously unreachable but highly stable and efficient designs. Our work is a timely tool for vaccines and other {mRNA}-based medicines encoding therapeutic proteins such as monoclonal antibodies and anti-cancer drugs7,8.},
	pages = {396--403},
	number = {7978},
	journaltitle = {Nature},
	shortjournal = {Nature},
	author = {Zhang, He and Zhang, Liang and Lin, Ang and Xu, Congcong and Li, Ziyu and Liu, Kaibo and Liu, Boxiang and Ma, Xiaopin and Zhao, Fanfan and Jiang, Huiling and Chen, Chunxiu and Shen, Haifa and Li, Hangwen and Mathews, David H. and Zhang, Yujian and Huang, Liang},
	date = {2023-09},
	pmid = {37130545},
	pmcid = {PMC10499610},
	keywords = {Humans, Animals, Mice, {RNA}, Messenger, Algorithms, {COVID}-19, {SARS}-{CoV}-2, Codon, {COVID}-19 Vaccines, Half-Life, Herpesvirus 3, Human, {mRNA} Vaccines, {RNA} Stability},
}

@article{von_bartheld_search_2016,
	title = {The search for true numbers of neurons and glial cells in the human brain: A review of 150 years of cell counting},
	volume = {524},
	issn = {1096-9861},
	doi = {10.1002/cne.24040},
	shorttitle = {The search for true numbers of neurons and glial cells in the human brain},
	abstract = {For half a century, the human brain was believed to contain about 100 billion neurons and one trillion glial cells, with a glia:neuron ratio of 10:1. A new counting method, the isotropic fractionator, has challenged the notion that glia outnumber neurons and revived a question that was widely thought to have been resolved. The recently validated isotropic fractionator demonstrates a glia:neuron ratio of less than 1:1 and a total number of less than 100 billion glial cells in the human brain. A survey of original evidence shows that histological data always supported a 1:1 ratio of glia to neurons in the entire human brain, and a range of 40-130 billion glial cells. We review how the claim of one trillion glial cells originated, was perpetuated, and eventually refuted. We compile how numbers of neurons and glial cells in the adult human brain were reported and we examine the reasons for an erroneous consensus about the relative abundance of glial cells in human brains that persisted for half a century. Our review includes a brief history of cell counting in human brains, types of counting methods that were and are employed, ranges of previous estimates, and the current status of knowledge about the number of cells. We also discuss implications and consequences of the new insights into true numbers of glial cells in the human brain, and the promise and potential impact of the newly validated isotropic fractionator for reliable quantification of glia and neurons in neurological and psychiatric diseases. J. Comp. Neurol. 524:3865-3895, 2016. © 2016 Wiley Periodicals, Inc.},
	pages = {3865--3895},
	number = {18},
	journaltitle = {The Journal of Comparative Neurology},
	shortjournal = {J Comp Neurol},
	author = {von Bartheld, Christopher S. and Bahney, Jami and Herculano-Houzel, Suzana},
	date = {2016-12-15},
	pmid = {27187682},
	pmcid = {PMC5063692},
	keywords = {Humans, Animals, Brain, Neurons, Cell Count, History, 19th Century, History, 20th Century, Neuroglia, cell counts, glia number, glia-neuron ratio, history, History, 21st Century, human brain, neuron number, quantification},
}

@article{herculano_houzel_human_2009,
	title = {The human brain in numbers: a linearly scaled-up primate brain},
	volume = {3},
	issn = {1662-5161},
	doi = {10.3389/neuro.09.031.2009},
	shorttitle = {The human brain in numbers},
	abstract = {The human brain has often been viewed as outstanding among mammalian brains: the most cognitively able, the largest-than-expected from body size, endowed with an overdeveloped cerebral cortex that represents over 80\% of brain mass, and purportedly containing 100 billion neurons and 10x more glial cells. Such uniqueness was seemingly necessary to justify the superior cognitive abilities of humans over larger-brained mammals such as elephants and whales. However, our recent studies using a novel method to determine the cellular composition of the brain of humans and other primates as well as of rodents and insectivores show that, since different cellular scaling rules apply to the brains within these orders, brain size can no longer be considered a proxy for the number of neurons in the brain. These studies also showed that the human brain is not exceptional in its cellular composition, as it was found to contain as many neuronal and non-neuronal cells as would be expected of a primate brain of its size. Additionally, the so-called overdeveloped human cerebral cortex holds only 19\% of all brain neurons, a fraction that is similar to that found in other mammals. In what regards absolute numbers of neurons, however, the human brain does have two advantages compared to other mammalian brains: compared to rodents, and probably to whales and elephants as well, it is built according to the very economical, space-saving scaling rules that apply to other primates; and, among economically built primate brains, it is the largest, hence containing the most neurons. These findings argue in favor of a view of cognitive abilities that is centered on absolute numbers of neurons, rather than on body size or encephalization, and call for a re-examination of several concepts related to the exceptionality of the human brain.},
	pages = {31},
	journaltitle = {Frontiers in Human Neuroscience},
	shortjournal = {Front Hum Neurosci},
	author = {Herculano-Houzel, Suzana},
	date = {2009},
	pmid = {19915731},
	pmcid = {PMC2776484},
	keywords = {brain scaling, encephalization, human, number of neurons},
}

@article{pareti_all_or_none_2007,
	title = {The "all-or-none" law in skeletal muscle and nerve fibres},
	volume = {145},
	issn = {0003-9829},
	doi = {10.4449/AIB.V145I1.865},
	abstract = {In 1905 the Cambridge physiologist Keith Lucas extended the "all-or-none" principle (introduced by H. P. Bowditch for the cardiac tissue) to skeletal muscle and nerve fibres. Nevertheless, in a short time it was clear that nerve fibres obey this law, but also that frequency of discharge is another relevant factor in the nervous conduction.},
	pages = {39--54},
	number = {1},
	journaltitle = {Archives Italiennes De Biologie},
	shortjournal = {Arch Ital Biol},
	author = {Pareti, G.},
	date = {2007-01},
	pmid = {17274183},
	keywords = {Humans, Animals, Neuromuscular Junction, Muscle, Skeletal, Germany, History, 19th Century, History, 20th Century, Models, Animal, Motor Neurons, Muscle Contraction, Neural Conduction, Neurophysiology, Peripheral Nerves, United Kingdom},
}

@article{wolosker_d_amino_2008,
	title = {D-amino acids in the brain: D-serine in neurotransmission and neurodegeneration},
	volume = {275},
	issn = {1742-464X},
	doi = {10.1111/j.1742-4658.2008.06515.x},
	shorttitle = {D-amino acids in the brain},
	abstract = {The mammalian brain contains unusually high levels of D-serine, a D-amino acid previously thought to be restricted to some bacteria and insects. In the last few years, studies from several groups have demonstrated that D-serine is a physiological co-agonist of the N-methyl D-aspartate ({NMDA}) type of glutamate receptor -- a key excitatory neurotransmitter receptor in the brain. D-Serine binds with high affinity to a co-agonist site at the {NMDA} receptors and, along with glutamate, mediates several important physiological and pathological processes, including {NMDA} receptor transmission, synaptic plasticity and neurotoxicity. In recent years, biosynthetic, degradative and release pathways for D-serine have been identified, indicating that D-serine may function as a transmitter. At first, D-serine was described in astrocytes, a class of glial cells that ensheathes neurons and release several transmitters that modulate neurotransmission. This led to the notion that D-serine is a glia-derived transmitter (or gliotransmitter). However, recent data indicate that serine racemase, the D-serine biosynthetic enzyme, is widely expressed in neurons of the brain, suggesting that D-serine also has a neuronal origin. We now review these findings, focusing on recent questions regarding the roles of glia versus neurons in d-serine signaling.},
	pages = {3514--3526},
	number = {14},
	journaltitle = {The {FEBS} journal},
	shortjournal = {{FEBS} J},
	author = {Wolosker, Herman and Dumin, Elena and Balan, Livia and Foltyn, Veronika N.},
	date = {2008-07},
	pmid = {18564180},
	keywords = {Animals, Brain, Neurons, Neurodegenerative Diseases, Rats, Receptors, N-Methyl-D-Aspartate, Serine, Synaptic Transmission, Neuroglia, Isomerism},
}

@article{allen_glia_2018,
	title = {Glia as architects of central nervous system formation and function},
	volume = {362},
	issn = {1095-9203},
	doi = {10.1126/science.aat0473},
	abstract = {Glia constitute roughly half of the cells of the central nervous system ({CNS}) but were long-considered to be static bystanders to its formation and function. Here we provide an overview of how the diverse and dynamic functions of glial cells orchestrate essentially all aspects of nervous system formation and function. Radial glia, astrocytes, oligodendrocyte progenitor cells, oligodendrocytes, and microglia each influence nervous system development, from neuronal birth, migration, axon specification, and growth through circuit assembly and synaptogenesis. As neural circuits mature, distinct glia fulfill key roles in synaptic communication, plasticity, homeostasis, and network-level activity through dynamic monitoring and alteration of {CNS} structure and function. Continued elucidation of glial cell biology, and the dynamic interactions of neurons and glia, will enrich our understanding of nervous system formation, health, and function.},
	pages = {181--185},
	number = {6411},
	journaltitle = {Science (New York, N.Y.)},
	shortjournal = {Science},
	author = {Allen, Nicola J. and Lyons, David A.},
	date = {2018-10-12},
	pmid = {30309945},
	pmcid = {PMC6292669},
	keywords = {Animals, Mice, Neurons, Central Nervous System, Neuronal Plasticity, Cell Movement, Synapses, Synaptic Transmission, Neuroglia, Neurogenesis},
}

@article{yang_neuroinflammation_2019,
	title = {Neuroinflammation in the central nervous system: Symphony of glial cells},
	volume = {67},
	issn = {1098-1136},
	doi = {10.1002/glia.23571},
	shorttitle = {Neuroinflammation in the central nervous system},
	abstract = {Neuroinflammation in the central nervous system ({CNS}) is an important subject of neuroimmunological research. Emerging evidence suggests that neuroinflammation is a key player in various neurological disorders, including neurodegenerative diseases and {CNS} injury. Neuroinflammation is a complex and well-orchestrated process by various groups of glial cells in {CNS} and peripheral immune cells. The cross-talks between various groups of glial cells in {CNS} neuroinflammation is an extremely complex and dynamic process which resembles a well-orchestrated symphony. However, the understanding of how glial cells interact with each other to shape the distinctive immune responses of the {CNS} remains limited. In this review, we will discuss the joint actions of glial cells in three phases of neuroinflammation, including initiation, progression, and prognosis, the three movements of the symphony, as the role of each type of glial cells in neuroinflammation depends on the nature of inflammatory cues and specific course of diseases. This perspective of glial cells in neuroinflammation might provide helpful clues to the development of the early diagnosis and therapeutic intervention of the various {CNS} diseases.},
	pages = {1017--1035},
	number = {6},
	journaltitle = {Glia},
	shortjournal = {Glia},
	author = {Yang, Qiao-Qiao and Zhou, Jia-Wei},
	date = {2019-06},
	pmid = {30548343},
	keywords = {Humans, Animals, Inflammation, Central Nervous System Diseases, Central Nervous System, Neurodegenerative Diseases, neuroinflammation, balance, {CNS}, glial cells, Inflammation Mediators, interaction, Neuroglia, neuroimmunology},
}

@article{kim_neuron_glia_2020,
	title = {Neuron-Glia Interactions in Neurodevelopmental Disorders},
	volume = {9},
	issn = {2073-4409},
	doi = {10.3390/cells9102176},
	abstract = {Recent studies have revealed synaptic dysfunction to be a hallmark of various psychiatric diseases, and that glial cells participate in synapse formation, development, and plasticity. Glial cells contribute to neuroinflammation and synaptic homeostasis, the latter being essential for maintaining the physiological function of the central nervous system ({CNS}). In particular, glial cells undergo gliotransmission and regulate neuronal activity in tripartite synapses via ion channels (gap junction hemichannel, volume regulated anion channel, and bestrophin-1), receptors (for neurotransmitters and cytokines), or transporters ({GLT}-1, {GLAST}, and {GATs}) that are expressed on glial cell membranes. In this review, we propose that dysfunction in neuron-glia interactions may contribute to the pathogenesis of neurodevelopmental disorders. Understanding the mechanisms of neuron-glia interaction for synapse formation and maturation will contribute to the development of novel therapeutic targets of neurodevelopmental disorders.},
	pages = {2176},
	number = {10},
	journaltitle = {Cells},
	shortjournal = {Cells},
	author = {Kim, Yoo Sung and Choi, Juwon and Yoon, Bo-Eun},
	date = {2020-09-27},
	pmid = {32992620},
	pmcid = {PMC7601502},
	keywords = {Humans, Astrocytes, Neurons, Neuroglia, {ADHD}, {ASD}, epilepsy, Homeostasis, neurodevelopmental disorder, Neurodevelopmental Disorders, Neurogenesis, neuron-glia interactions},
}

@article{agatonovic_kustrin_basic_2000,
	title = {Basic concepts of artificial neural network ({ANN}) modeling and its application in pharmaceutical research},
	volume = {22},
	issn = {07317085},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0731708599002721},
	doi = {10.1016/S0731-7085(99)00272-1},
	pages = {717--727},
	number = {5},
	journaltitle = {Journal of Pharmaceutical and Biomedical Analysis},
	shortjournal = {Journal of Pharmaceutical and Biomedical Analysis},
	author = {Agatonovic-Kustrin, S and Beresford, R},
	urldate = {2024-02-09},
	date = {2000-06},
	langid = {english},
}

@article{parisi_artificial_1997,
	title = {Artificial Life and Higher Level Cognition},
	volume = {34},
	issn = {02782626},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0278262697909118},
	doi = {10.1006/brcg.1997.0911},
	pages = {160--184},
	number = {1},
	journaltitle = {Brain and Cognition},
	shortjournal = {Brain and Cognition},
	author = {Parisi, Domenico},
	urldate = {2024-02-22},
	date = {1997-06},
	langid = {english},
}

@article{zahedi_introduction_1991,
	title = {An Introduction to Neural Networks and a Comparison with Artificial Intelligence and Expert Systems},
	volume = {21},
	issn = {0092-2102, 1526-551X},
	url = {https://pubsonline.informs.org/doi/10.1287/inte.21.2.25},
	doi = {10.1287/inte.21.2.25},
	abstract = {Artificial intelligence (including expert systems) ({AI}/{ES}) and neural networks ({NN}) provide methods for formalizing qualitative aspects of business systems. They complement quantitative methods in solving business problems. While {AI} and {NN} have the common goal of simulating human intelligence, they use different methods. {AI}/{ES} assumes the brain is a black box and imitates the human reasoning process. It processes knowledge sequentially, represents it explicitly, and mostly uses deductive reasoning. Learning takes place outside the system.
            {NN} treats the brain as a white box and imitates its structure and function, using a parallel approach to simulate human intelligence. It represents knowledge implicitly within its structure and applies inductive reasoning to process knowledge. Learning takes place within the system. Both {AI}/{ES} and {NN} have great potential to solve qualitative problems, and their integration could provide a powerful tool for dealing with problems outside the domain of current problem-solving methods.},
	pages = {25--38},
	number = {2},
	journaltitle = {Interfaces},
	shortjournal = {Interfaces},
	author = {Zahedi, Fatemeh},
	urldate = {2024-02-22},
	date = {1991-04},
	langid = {english},
}

@article{cichy_deep_2019,
	title = {Deep Neural Networks as Scientific Models},
	volume = {23},
	issn = {13646613},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1364661319300348},
	doi = {10.1016/j.tics.2019.01.009},
	pages = {305--317},
	number = {4},
	journaltitle = {Trends in Cognitive Sciences},
	shortjournal = {Trends in Cognitive Sciences},
	author = {Cichy, Radoslaw M. and Kaiser, Daniel},
	urldate = {2024-02-09},
	date = {2019-04},
	langid = {english},
}

@article{pichler_machine_2023,
	title = {Machine learning and deep learning—A review for ecologists},
	volume = {14},
	issn = {2041-210X, 2041-210X},
	url = {https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.14061},
	doi = {10.1111/2041-210X.14061},
	abstract = {The popularity of machine learning ({ML}), deep learning ({DL}) and artificial intelligence ({AI}) has risen sharply in recent years. Despite this spike in popularity, the inner workings of {ML} and {DL} algorithms are often perceived as opaque, and their relationship to classical data analysis tools remains debated. Although it is often assumed that {ML} and {DL} excel primarily at making predictions, {ML} and {DL} can also be used for analytical tasks traditionally addressed with statistical models. Moreover, most recent discussions and reviews on {ML} focus mainly on {DL}, failing to synthesise the wealth of {ML} algorithms with different advantages and general principles. Here, we provide a comprehensive overview of the field of {ML} and {DL}, starting by summarizing its historical developments, existing algorithm families, differences to traditional statistical tools, and universal {ML} principles. We then discuss why and when {ML} and {DL} models excel at prediction tasks and where they could offer alternatives to traditional statistical methods for inference, highlighting current and emerging applications for ecological problems. Finally, we summarize emerging trends such as scientific and causal {ML}, explainable {AI}, and responsible {AI} that may significantly impact ecological data analysis in the future. We conclude that {ML} and {DL} are powerful new tools for predictive modelling and data analysis. The superior performance of {ML} and {DL} algorithms compared to statistical models can be explained by their higher flexibility and automatic data‐dependent complexity optimization. However, their use for causal inference is still disputed as the focus of {ML} and {DL} methods on predictions creates challenges for the interpretation of these models. Nevertheless, we expect {ML} and {DL} to become an indispensable tool in ecology and evolution, comparable to other traditional statistical tools.},
	pages = {994--1016},
	number = {4},
	journaltitle = {Methods in Ecology and Evolution},
	shortjournal = {Methods Ecol Evol},
	author = {Pichler, Maximilian and Hartig, Florian},
	urldate = {2024-02-22},
	date = {2023-04},
	langid = {english},
}

@article{chavlis_drawing_2021,
	title = {Drawing inspiration from biological dendrites to empower artificial neural networks},
	volume = {70},
	issn = {1873-6882},
	doi = {10.1016/j.conb.2021.04.007},
	abstract = {This article highlights specific features of biological neurons and their dendritic trees, whose adoption may help advance artificial neural networks used in various machine learning applications. Advancements could take the form of increased computational capabilities and/or reduced power consumption. Proposed features include dendritic anatomy, dendritic nonlinearities, and compartmentalized plasticity rules, all of which shape learning and information processing in biological networks. We discuss the computational benefits provided by these features in biological neurons and suggest ways to adopt them in artificial neurons in order to exploit the respective benefits in machine learning.},
	pages = {1--10},
	journaltitle = {Current Opinion in Neurobiology},
	shortjournal = {Curr Opin Neurobiol},
	author = {Chavlis, Spyridon and Poirazi, Panayiota},
	date = {2021-10},
	pmid = {34087540},
	keywords = {Dendrites, Neurons, Models, Neurological, Machine Learning, Neural Networks, Computer},
}

@article{whittington_theories_2019,
	title = {Theories of Error Back-Propagation in the Brain},
	volume = {23},
	issn = {1879-307X},
	doi = {10.1016/j.tics.2018.12.005},
	abstract = {This review article summarises recently proposed theories on how neural circuits in the brain could approximate the error back-propagation algorithm used by artificial neural networks. Computational models implementing these theories achieve learning as efficient as artificial neural networks, but they use simple synaptic plasticity rules based on activity of presynaptic and postsynaptic neurons. The models have similarities, such as including both feedforward and feedback connections, allowing information about error to propagate throughout the network. Furthermore, they incorporate experimental evidence on neural connectivity, responses, and plasticity. These models provide insights on how brain networks might be organised such that modification of synaptic weights on multiple levels of cortical hierarchy leads to improved performance on tasks.},
	pages = {235--250},
	number = {3},
	journaltitle = {Trends in Cognitive Sciences},
	shortjournal = {Trends Cogn Sci},
	author = {Whittington, James C. R. and Bogacz, Rafal},
	date = {2019-03},
	pmid = {30704969},
	pmcid = {PMC6382460},
	keywords = {Humans, Brain, synaptic plasticity, Neural Networks, Computer, Models, Theoretical, Nerve Net, deep learning, neural networks, predictive coding},
}

@article{lillicrap_backpropagation_2020,
	title = {Backpropagation and the brain},
	volume = {21},
	issn = {1471-0048},
	doi = {10.1038/s41583-020-0277-3},
	abstract = {During learning, the brain modifies synapses to improve behaviour. In the cortex, synapses are embedded within multilayered networks, making it difficult to determine the effect of an individual synaptic modification on the behaviour of the system. The backpropagation algorithm solves this problem in deep artificial neural networks, but historically it has been viewed as biologically problematic. Nonetheless, recent developments in neuroscience and the successes of artificial neural networks have reinvigorated interest in whether backpropagation offers insights for understanding learning in the cortex. The backpropagation algorithm learns quickly by computing synaptic updates using feedback connections to deliver error signals. Although feedback connections are ubiquitous in the cortex, it is difficult to see how they could deliver the error signals required by strict formulations of backpropagation. Here we build on past and recent developments to argue that feedback connections may instead induce neural activities whose differences can be used to locally approximate these signals and hence drive effective learning in deep networks in the brain.},
	pages = {335--346},
	number = {6},
	journaltitle = {Nature Reviews. Neuroscience},
	shortjournal = {Nat Rev Neurosci},
	author = {Lillicrap, Timothy P. and Santoro, Adam and Marris, Luke and Akerman, Colin J. and Hinton, Geoffrey},
	date = {2020-06},
	pmid = {32303713},
	keywords = {Humans, Animals, Models, Neurological, Cerebral Cortex, Neural Networks, Computer, Learning, Algorithms, Feedback},
}

@article{aru_cellular_2020,
	title = {Cellular Mechanisms of Conscious Processing},
	volume = {24},
	issn = {13646613},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1364661320301753},
	doi = {10.1016/j.tics.2020.07.006},
	pages = {814--825},
	number = {10},
	journaltitle = {Trends in Cognitive Sciences},
	shortjournal = {Trends in Cognitive Sciences},
	author = {Aru, Jaan and Suzuki, Mototaka and Larkum, Matthew E.},
	urldate = {2024-02-22},
	date = {2020-10},
	langid = {english},
}

@article{munakata_hebbian_2004,
	title = {Hebbian learning and development},
	volume = {7},
	issn = {1363-755X, 1467-7687},
	url = {https://onlinelibrary.wiley.com/doi/10.1111/j.1467-7687.2004.00331.x},
	doi = {10.1111/j.1467-7687.2004.00331.x},
	abstract = {Hebbian learning is a biologically plausible and ecologically valid learning mechanism. In Hebbian learning, ‘units that fire together, wire together’. Such learning may occur at the neural level in terms of long‐term potentiation ({LTP}) and long‐term depression ({LTD}). Many features of Hebbian learning are relevant to developmental theorizing, including its self‐organizing nature and its ability to extract statistical regularities from the environment. Hebbian learning mechanisms may also play an important role in critical periods during development, and in a number of other developmental phenomena.},
	pages = {141--148},
	number = {2},
	journaltitle = {Developmental Science},
	shortjournal = {Developmental Science},
	author = {Munakata, Yuko and Pfaffly, Jason},
	urldate = {2024-02-22},
	date = {2004-04},
	langid = {english},
}

@article{botvinick_reinforcement_2019,
	title = {Reinforcement Learning, Fast and Slow},
	volume = {23},
	issn = {1879-307X},
	doi = {10.1016/j.tics.2019.02.006},
	abstract = {Deep reinforcement learning ({RL}) methods have driven impressive advances in artificial intelligence in recent years, exceeding human performance in domains ranging from Atari to Go to no-limit poker. This progress has drawn the attention of cognitive scientists interested in understanding human learning. However, the concern has been raised that deep {RL} may be too sample-inefficient - that is, it may simply be too slow - to provide a plausible model of how humans learn. In the present review, we counter this critique by describing recently developed techniques that allow deep {RL} to operate more nimbly, solving problems much more quickly than previous methods. Although these techniques were developed in an {AI} context, we propose that they may have rich implications for psychology and neuroscience. A key insight, arising from these {AI} methods, concerns the fundamental connection between fast {RL} and slower, more incremental forms of learning.},
	pages = {408--422},
	number = {5},
	journaltitle = {Trends in Cognitive Sciences},
	shortjournal = {Trends Cogn Sci},
	author = {Botvinick, Matthew and Ritter, Sam and Wang, Jane X. and Kurth-Nelson, Zeb and Blundell, Charles and Hassabis, Demis},
	date = {2019-05},
	pmid = {31003893},
	keywords = {Humans, Animals, Time Factors, Neural Networks, Computer, Artificial Intelligence, Memory, Episodic, Reinforcement, Psychology},
}

@article{halassa_integrated_2010,
	title = {Integrated brain circuits: astrocytic networks modulate neuronal activity and behavior},
	volume = {72},
	issn = {1545-1585},
	doi = {10.1146/annurev-physiol-021909-135843},
	shorttitle = {Integrated brain circuits},
	abstract = {The past decade has seen an explosion of research on roles of neuron-astrocyte interactions in the control of brain function. We highlight recent studies performed on the tripartite synapse, the structure consisting of pre- and postsynaptic elements of the synapse and an associated astrocytic process. Astrocytes respond to neuronal activity and neurotransmitters, through the activation of metabotropic receptors, and can release the gliotransmitters {ATP}, d-serine, and glutamate, which act on neurons. Astrocyte-derived {ATP} modulates synaptic transmission, either directly or through its metabolic product adenosine. d-serine modulates {NMDA} receptor function, whereas glia-derived glutamate can play important roles in relapse following withdrawal from drugs of abuse. Cell type-specific molecular genetics has allowed a new level of examination of the function of astrocytes in brain function and has revealed an important role of these glial cells that is mediated by adenosine accumulation in the control of sleep and in cognitive impairments that follow sleep deprivation.},
	pages = {335--355},
	journaltitle = {Annual Review of Physiology},
	shortjournal = {Annu Rev Physiol},
	author = {Halassa, Michael M. and Haydon, Philip G.},
	date = {2010},
	pmid = {20148679},
	pmcid = {PMC3117429},
	keywords = {Humans, Animals, Astrocytes, Neurons, Neurotransmitter Agents, Nervous System Diseases, Sleep, Adenosine Triphosphate, Behavior, Animal, Calcium Signaling, Glutamic Acid, Nerve Net, Purines, Receptors, N-Methyl-D-Aspartate, Serine, Synapses, Synaptic Transmission},
}

@article{changeux_theory_1973,
	title = {A theory of the epigenesis of neuronal networks by selective stabilization of synapses},
	volume = {70},
	issn = {0027-8424},
	doi = {10.1073/pnas.70.10.2974},
	abstract = {A formalism is introduced to represent the connective organization of an evolving neuronal network and the effects of environment on this organization by stabilization or degeneration of labile synapses associated with functioning. Learning, or the acquisition of an associative property, is related to a characteristic variability of the connective organization: the interaction of the environment with the genetic program is printed as a particular pattern of such organization through neuronal functioning. An application of the theory to the development of the neuromuscular junction is proposed and the basic selective aspect of learning emphasized.},
	pages = {2974--2978},
	number = {10},
	journaltitle = {Proceedings of the National Academy of Sciences of the United States of America},
	shortjournal = {Proc Natl Acad Sci U S A},
	author = {Changeux, J. P. and Courrège, P. and Danchin, A.},
	date = {1973-10},
	pmid = {4517949},
	pmcid = {PMC427150},
	keywords = {Neuromuscular Junction, Models, Neurological, Learning, Synapses, Environment},
}

@article{volzhenin_multilevel_2022,
	title = {Multilevel development of cognitive abilities in an artificial neural network},
	volume = {119},
	issn = {0027-8424, 1091-6490},
	url = {https://pnas.org/doi/full/10.1073/pnas.2201304119},
	doi = {10.1073/pnas.2201304119},
	abstract = {Several neuronal mechanisms have been proposed to account for the formation of cognitive abilities through postnatal interactions with the physical and sociocultural environment. Here, we introduce a three-level computational model of information processing and acquisition of cognitive abilities. We propose minimal architectural requirements to build these levels, and how the parameters affect their performance and relationships. The first sensorimotor level handles local nonconscious processing, here during a visual classification task. The second level or cognitive level globally integrates the information from multiple local processors via long-ranged connections and synthesizes it in a global, but still nonconscious, manner. The third and cognitively highest level handles the information globally and consciously. It is based on the global neuronal workspace ({GNW}) theory and is referred to as the conscious level. We use the trace and delay conditioning tasks to, respectively, challenge the second and third levels. Results first highlight the necessity of epigenesis through the selection and stabilization of synapses at both local and global scales to allow the network to solve the first two tasks. At the global scale, dopamine appears necessary to properly provide credit assignment despite the temporal delay between perception and reward. At the third level, the presence of interneurons becomes necessary to maintain a self-sustained representation within the {GNW} in the absence of sensory input. Finally, while balanced spontaneous intrinsic activity facilitates epigenesis at both local and global scales, the balanced excitatory/inhibitory ratio increases performance. We discuss the plausibility of the model in both neurodevelopmental and artificial intelligence terms.},
	pages = {e2201304119},
	number = {39},
	journaltitle = {Proceedings of the National Academy of Sciences},
	shortjournal = {Proc. Natl. Acad. Sci. U.S.A.},
	author = {Volzhenin, Konstantin and Changeux, Jean-Pierre and Dumas, Guillaume},
	urldate = {2024-02-22},
	date = {2022-09-27},
	langid = {english},
}

@article{desislavov_trends_2023,
	title = {Trends in {AI} inference energy consumption: Beyond the performance-vs-parameter laws of deep learning},
	volume = {38},
	issn = {22105379},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S2210537923000124},
	doi = {10.1016/j.suscom.2023.100857},
	shorttitle = {Trends in {AI} inference energy consumption},
	pages = {100857},
	journaltitle = {Sustainable Computing: Informatics and Systems},
	shortjournal = {Sustainable Computing: Informatics and Systems},
	author = {Desislavov, Radosvet and Martínez-Plumed, Fernando and Hernández-Orallo, José},
	urldate = {2024-03-08},
	date = {2023-04},
	langid = {english},
}

@article{attwell_energy_2001,
	title = {An Energy Budget for Signaling in the Grey Matter of the Brain},
	volume = {21},
	issn = {0271-678X, 1559-7016},
	url = {http://journals.sagepub.com/doi/10.1097/00004647-200110000-00001},
	doi = {10.1097/00004647-200110000-00001},
	abstract = {Anatomic and physiologic data are used to analyze the energy expenditure on different components of excitatory signaling in the grey matter of rodent brain. Action potentials and postsynaptic effects of glutamate are predicted to consume much of the energy (47\% and 34\%, respectively), with the resting potential consuming a smaller amount (13\%), and glutamate recycling using only 3\%. Energy usage depends strongly on action potential rate—an increase in activity of 1 action potential/cortical neuron/s will raise oxygen consumption by 145 {mL}/100 g grey matter/h. The energy expended on signaling is a large fraction of the total energy used by the brain; this favors the use of energy efficient neural codes and wiring patterns. Our estimates of energy usage predict the use of distributed codes, with ≤15\% of neurons simultaneously active, to reduce energy consumption and allow greater computing power from a fixed number of neurons. Functional magnetic resonance imaging signals are likely to be dominated by changes in energy usage associated with synaptic currents and action potential propagation.},
	pages = {1133--1145},
	number = {10},
	journaltitle = {Journal of Cerebral Blood Flow \& Metabolism},
	shortjournal = {J Cereb Blood Flow Metab},
	author = {Attwell, David and Laughlin, Simon B.},
	urldate = {2024-03-08},
	date = {2001-10},
	langid = {english},
	pmid = {11598490},
}

@misc{vaswani_attention_2023,
	title = {Attention Is All You Need},
	url = {http://arxiv.org/abs/1706.03762},
	doi = {10.48550/arXiv.1706.03762},
	abstract = {The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 {BLEU} on the {WMT} 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 {BLEU}. On the {WMT} 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art {BLEU} score of 41.8 after training for 3.5 days on eight {GPUs}, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.},
	number = {{arXiv}:1706.03762},
	publisher = {{arXiv}},
	author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Lukasz and Polosukhin, Illia},
	urldate = {2024-02-09},
	date = {2023-08-01},
	eprinttype = {arxiv},
	eprint = {1706.03762 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
}

@article{cowan_capacity_2005,
	title = {On the capacity of attention: Its estimation and its role in working memory and cognitive aptitudes},
	volume = {51},
	issn = {00100285},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0010028505000022},
	doi = {10.1016/j.cogpsych.2004.12.001},
	shorttitle = {On the capacity of attention},
	pages = {42--100},
	number = {1},
	journaltitle = {Cognitive Psychology},
	shortjournal = {Cognitive Psychology},
	author = {Cowan, Nelson and Elliott, Emily M. and Scott Saults, J. and Morey, Candice C. and Mattox, Sam and Hismjatullina, Anna and Conway, Andrew R.A.},
	urldate = {2024-02-22},
	date = {2005-08},
	langid = {english},
	file = {Accepted Version:C\:\\Users\\syc\\Zotero\\storage\\TIPXENIA\\Cowan et al. - 2005 - On the capacity of attention Its estimation and i.pdf:application/pdf},
}
#54
@article{cowan_magical_2001,
	title = {The magical number 4 in short-term memory: a reconsideration of mental storage capacity},
	volume = {24},
	issn = {0140-525X},
	doi = {10.1017/s0140525x01003922},
	shorttitle = {The magical number 4 in short-term memory},
	abstract = {Miller (1956) summarized evidence that people can remember about seven chunks in short-term memory ({STM}) tasks. However, that number was meant more as a rough estimate and a rhetorical device than as a real capacity limit. Others have since suggested that there is a more precise capacity limit, but that it is only three to five chunks. The present target article brings together a wide variety of data on capacity limits suggesting that the smaller capacity limit is real. Capacity limits will be useful in analyses of information processing only if the boundary conditions for observing them can be carefully described. Four basic conditions in which chunks can be identified and capacity limits can accordingly be observed are: (1) when information overload limits chunks to individual stimulus items, (2) when other steps are taken specifically to block the recording of stimulus items into larger chunks, (3) in performance discontinuities caused by the capacity limit, and (4) in various indirect effects of the capacity limit. Under these conditions, rehearsal and long-term memory cannot be used to combine stimulus items into chunks of an unknown size; nor can storage mechanisms that are not capacity-limited, such as sensory memory, allow the capacity-limited storage mechanism to be refilled during recall. A single, central capacity limit averaging about four chunks is implicated along with other, noncapacity-limited sources. The pure {STM} capacity limit expressed in chunks is distinguished from compound {STM} limits obtained when the number of separately held chunks is unclear. Reasons why pure capacity estimates fall within a narrow range are discussed and a capacity limit for the focus of attention is proposed.},
	pages = {87--114; discussion 114--185},
	number = {1},
	journaltitle = {The Behavioral and Brain Sciences},
	shortjournal = {Behav Brain Sci},
	author = {Cowan, N.},
	date = {2001-02},
	pmid = {11515286},
	keywords = {Humans, Brain, Cognition, Schizophrenia, Psychological Theory, Functional Laterality, Learning, Attention, Linguistics, Mathematics, Memory, Short-Term, Visual Fields},
}

@article{polti_effect_2018,
	title = {The effect of attention and working memory on the estimation of elapsed time},
	volume = {8},
	issn = {2045-2322},
	url = {https://www.nature.com/articles/s41598-018-25119-y},
	doi = {10.1038/s41598-018-25119-y},
	abstract = {Abstract
            Psychological models of time perception involve attention and memory: while attention typically regulates the flow of events, memory maintains timed events or intervals. The precise, and possibly distinct, roles of attention and memory in time perception remain debated. In this behavioral study, we tested 48 participants in a prospective duration estimation task while they fully attended to time or performed a working memory ({WM}) task. We report that paying attention to time lengthened perceived duration in the range of seconds to minutes, whereas diverting attention away from time shortened perceived duration. The overestimation due to attending to time did not scale with durations. To the contrary, increasing {WM} load systematically decreased subjective duration and this effect scaled with durations. Herein, we discuss the dissociation between attention and {WM} in timing and scalar variability from the perspective of Bayesian models of time estimations.},
	pages = {6690},
	number = {1},
	journaltitle = {Scientific Reports},
	shortjournal = {Sci Rep},
	author = {Polti, Ignacio and Martin, Benoît and Van Wassenhove, Virginie},
	urldate = {2024-02-22},
	date = {2018-04-27},
	langid = {english},
}

@article{buschman_behavior_2015,
	title = {From Behavior to Neural Dynamics: An Integrated Theory of Attention},
	volume = {88},
	issn = {08966273},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0896627315007746},
	doi = {10.1016/j.neuron.2015.09.017},
	shorttitle = {From Behavior to Neural Dynamics},
	pages = {127--144},
	number = {1},
	journaltitle = {Neuron},
	shortjournal = {Neuron},
	author = {Buschman, Timothy J. and Kastner, Sabine},
	urldate = {2024-02-22},
	date = {2015-10},
	langid = {english},
}

@article{chun_interactions_2007,
	title = {Interactions between attention and memory},
	volume = {17},
	issn = {09594388},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0959438807000360},
	doi = {10.1016/j.conb.2007.03.005},
	pages = {177--184},
	number = {2},
	journaltitle = {Current Opinion in Neurobiology},
	shortjournal = {Current Opinion in Neurobiology},
	author = {Chun, Marvin M and Turk-Browne, Nicholas B},
	urldate = {2024-02-22},
	date = {2007-04},
	langid = {english},
}

@article{wiley_working_2012,
	title = {Working Memory Capacity, Attentional Focus, and Problem Solving},
	volume = {21},
	issn = {0963-7214, 1467-8721},
	url = {http://journals.sagepub.com/doi/10.1177/0963721412447622},
	doi = {10.1177/0963721412447622},
	abstract = {Attentional focus is important for many cognitive processes, including problem solving. In this article, we discuss working memory capacity ({WMC}), a construct related to the ability to focus attention, and its differential effects on analytic and creative problem solving. One of the main ways in which {WMC} benefits analytic problem solving seems to be that it helps problem solvers to control their attention, resist distraction, and narrow their search through a problem space. Conversely, several lines of recent evidence have shown that too much focus can actually harm performance on creative problem-solving tasks.},
	pages = {258--262},
	number = {4},
	journaltitle = {Current Directions in Psychological Science},
	shortjournal = {Curr Dir Psychol Sci},
	author = {Wiley, Jennifer and Jarosz, Andrew F.},
	urldate = {2024-02-22},
	date = {2012-08},
	langid = {english},
}

@article{kutter_distinct_2023,
	title = {Distinct neuronal representation of small and large numbers in the human medial temporal lobe},
	volume = {7},
	issn = {2397-3374},
	url = {https://www.nature.com/articles/s41562-023-01709-3},
	doi = {10.1038/s41562-023-01709-3},
	pages = {1998--2007},
	number = {11},
	journaltitle = {Nature Human Behaviour},
	shortjournal = {Nat Hum Behav},
	author = {Kutter, Esther F. and Dehnen, Gert and Borger, Valeri and Surges, Rainer and Mormann, Florian and Nieder, Andreas},
	urldate = {2024-03-08},
	date = {2023-10-02},
	langid = {english},
	pmid = {37783890},
}

@article{niu_review_2021,
	title = {A review on the attention mechanism of deep learning},
	volume = {452},
	issn = {09252312},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S092523122100477X},
	doi = {10.1016/j.neucom.2021.03.091},
	pages = {48--62},
	journaltitle = {Neurocomputing},
	shortjournal = {Neurocomputing},
	author = {Niu, Zhaoyang and Zhong, Guoqiang and Yu, Hui},
	urldate = {2024-02-22},
	date = {2021-09},
	langid = {english},
}

@article{zhang_neural_2020,
	title = {Neural Machine Translation with Deep Attention},
	volume = {42},
	issn = {0162-8828, 2160-9292, 1939-3539},
	url = {https://ieeexplore.ieee.org/document/8493282/},
	doi = {10.1109/TPAMI.2018.2876404},
	pages = {154--163},
	number = {1},
	journaltitle = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
	shortjournal = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
	author = {Zhang, Biao and Xiong, Deyi and Su, Jinsong},
	urldate = {2024-02-22},
	date = {2020-01-01},
}

@article{guo_attention_2022,
	title = {Attention mechanisms in computer vision: A survey},
	volume = {8},
	issn = {2096-0433, 2096-0662},
	url = {https://link.springer.com/10.1007/s41095-022-0271-y},
	doi = {10.1007/s41095-022-0271-y},
	shorttitle = {Attention mechanisms in computer vision},
	abstract = {Humans can naturally and effectively find salient regions in complex scenes. Motivated by this observation, attention mechanisms were introduced into computer vision with the aim of imitating this aspect of the human visual system. Such an attention mechanism can be regarded as a dynamic weight adjustment process based on features of the input image. Attention mechanisms have achieved great success in many visual tasks, including image classification, object detection, semantic segmentation, video understanding, image generation, 3D vision, multimodal tasks, and self-supervised learning. In this survey, we provide a comprehensive review of various attention mechanisms in computer vision and categorize them according to approach, such as channel attention, spatial attention, temporal attention, and branch attention; a related repository https://github.com/{MenghaoGuo}/Awesome-Vision-Attentions is dedicated to collecting related work. We also suggest future directions for attention mechanism research.},
	pages = {331--368},
	number = {3},
	journaltitle = {Computational Visual Media},
	shortjournal = {Comp. Visual Media},
	author = {Guo, Meng-Hao and Xu, Tian-Xing and Liu, Jiang-Jiang and Liu, Zheng-Ning and Jiang, Peng-Tao and Mu, Tai-Jiang and Zhang, Song-Hai and Martin, Ralph R. and Cheng, Ming-Ming and Hu, Shi-Min},
	urldate = {2024-02-22},
	date = {2022-09},
	langid = {english},
}

@article{ding_deep_2021,
	title = {Deep connected attention ({DCA}) {ResNet} for robust voice pathology detection and classification},
	volume = {70},
	issn = {17468094},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S174680942100570X},
	doi = {10.1016/j.bspc.2021.102973},
	pages = {102973},
	journaltitle = {Biomedical Signal Processing and Control},
	shortjournal = {Biomedical Signal Processing and Control},
	author = {Ding, Huijun and Gu, Zixiong and Dai, Peng and Zhou, Zhou and Wang, Lu and Wu, Xiaoxiao},
	urldate = {2024-02-22},
	date = {2021-09},
	langid = {english},
}

@article{khan_transformers_2022,
	title = {Transformers in Vision: A Survey},
	volume = {54},
	issn = {0360-0300, 1557-7341},
	url = {http://arxiv.org/abs/2101.01169},
	doi = {10.1145/3505244},
	shorttitle = {Transformers in Vision},
	abstract = {Astounding results from Transformer models on natural language tasks have intrigued the vision community to study their application to computer vision problems. Among their salient benefits, Transformers enable modeling long dependencies between input sequence elements and support parallel processing of sequence as compared to recurrent networks e.g., Long short-term memory ({LSTM}). Different from convolutional networks, Transformers require minimal inductive biases for their design and are naturally suited as set-functions. Furthermore, the straightforward design of Transformers allows processing multiple modalities (e.g., images, videos, text and speech) using similar processing blocks and demonstrates excellent scalability to very large capacity networks and huge datasets. These strengths have led to exciting progress on a number of vision tasks using Transformer networks. This survey aims to provide a comprehensive overview of the Transformer models in the computer vision discipline. We start with an introduction to fundamental concepts behind the success of Transformers i.e., self-attention, large-scale pre-training, and bidirectional encoding. We then cover extensive applications of transformers in vision including popular recognition tasks (e.g., image classification, object detection, action recognition, and segmentation), generative modeling, multi-modal tasks (e.g., visual-question answering, visual reasoning, and visual grounding), video processing (e.g., activity recognition, video forecasting), low-level vision (e.g., image super-resolution, image enhancement, and colorization) and 3D analysis (e.g., point cloud classification and segmentation). We compare the respective advantages and limitations of popular techniques both in terms of architectural design and their experimental value. Finally, we provide an analysis on open research directions and possible future works.},
	pages = {1--41},
	number = {10},
	journaltitle = {{ACM} Computing Surveys},
	shortjournal = {{ACM} Comput. Surv.},
	author = {Khan, Salman and Naseer, Muzammal and Hayat, Munawar and Zamir, Syed Waqas and Khan, Fahad Shahbaz and Shah, Mubarak},
	urldate = {2024-02-09},
	date = {2022-01-31},
	eprinttype = {arxiv},
	eprint = {2101.01169 [cs]},
	keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition},
}

@misc{liu_multi_head_2021,
	title = {Multi-head or Single-head? An Empirical Comparison for Transformer Training},
	url = {http://arxiv.org/abs/2106.09650},
	doi = {10.48550/arXiv.2106.09650},
	shorttitle = {Multi-head or Single-head?},
	abstract = {Multi-head attention plays a crucial role in the recent success of Transformer models, which leads to consistent performance improvements over conventional attention in various applications. The popular belief is that this effectiveness stems from the ability of jointly attending multiple positions. In this paper, we first demonstrate that jointly attending multiple positions is not a unique feature of multi-head attention, as multi-layer single-head attention also attends multiple positions and is more effective. Then, we suggest the main advantage of the multi-head attention is the training stability, since it has less number of layers than the single-head attention, when attending the same number of positions. For example, 24-layer 16-head Transformer ({BERT}-large) and 384-layer single-head Transformer has the same total attention head number and roughly the same model size, while the multi-head one is significantly shallower. Meanwhile, we show that, with recent advances in deep learning, we can successfully stabilize the training of the 384-layer Transformer. As the training difficulty is no longer a bottleneck, substantially deeper single-head Transformer achieves consistent performance improvements without tuning hyper-parameters.},
	number = {{arXiv}:2106.09650},
	publisher = {{arXiv}},
	author = {Liu, Liyuan and Liu, Jialu and Han, Jiawei},
	urldate = {2024-02-23},
	date = {2021-06-17},
	eprinttype = {arxiv},
	eprint = {2106.09650 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
}

@article{de_santana_correia_attention_2022,
	title = {Attention, please! A survey of neural attention models in deep learning},
	volume = {55},
	issn = {0269-2821, 1573-7462},
	url = {https://link.springer.com/10.1007/s10462-022-10148-x},
	doi = {10.1007/s10462-022-10148-x},
	pages = {6037--6124},
	number = {8},
	journaltitle = {Artificial Intelligence Review},
	shortjournal = {Artif Intell Rev},
	author = {De Santana Correia, Alana and Colombini, Esther Luna},
	urldate = {2024-02-22},
	date = {2022-12},
	langid = {english},
}

@article{lai_understanding_2021,
	title = {Understanding More About Human and Machine Attention in Deep Neural Networks},
	volume = {23},
	issn = {1520-9210, 1941-0077},
	url = {https://ieeexplore.ieee.org/document/9133499/},
	doi = {10.1109/TMM.2020.3007321},
	pages = {2086--2099},
	journaltitle = {{IEEE} Transactions on Multimedia},
	shortjournal = {{IEEE} Trans. Multimedia},
	author = {Lai, Qiuxia and Khan, Salman and Nie, Yongwei and Sun, Hanqiu and Shen, Jianbing and Shao, Ling},
	urldate = {2024-02-22},
	date = {2021},
}

@inproceedings{vig_analyzing_2019,
	location = {Florence, Italy},
	title = {Analyzing the Structure of Attention in a Transformer Language Model},
	url = {https://www.aclweb.org/anthology/W19-4808},
	doi = {10.18653/v1/W19-4808},
	eventtitle = {Proceedings of the 2019 {ACL} Workshop {BlackboxNLP}: Analyzing and Interpreting Neural Networks for {NLP}},
	pages = {63--76},
	booktitle = {Proceedings of the 2019 {ACL} Workshop {BlackboxNLP}: Analyzing and Interpreting Neural Networks for {NLP}},
	publisher = {Association for Computational Linguistics},
	author = {Vig, Jesse and Belinkov, Yonatan},
	urldate = {2024-02-23},
	date = {2019},
	langid = {english},
}

@misc{binz_turning_2023,
	title = {Turning large language models into cognitive models},
	url = {http://arxiv.org/abs/2306.03917},
	doi = {10.48550/arXiv.2306.03917},
	abstract = {Large language models are powerful systems that excel at many tasks, ranging from translation to mathematical reasoning. Yet, at the same time, these models often show unhuman-like characteristics. In the present paper, we address this gap and ask whether large language models can be turned into cognitive models. We find that -- after finetuning them on data from psychological experiments -- these models offer accurate representations of human behavior, even outperforming traditional cognitive models in two decision-making domains. In addition, we show that their representations contain the information necessary to model behavior on the level of individual subjects. Finally, we demonstrate that finetuning on multiple tasks enables large language models to predict human behavior in a previously unseen task. Taken together, these results suggest that large, pre-trained models can be adapted to become generalist cognitive models, thereby opening up new research directions that could transform cognitive psychology and the behavioral sciences as a whole.},
	number = {{arXiv}:2306.03917},
	publisher = {{arXiv}},
	author = {Binz, Marcel and Schulz, Eric},
	urldate = {2024-02-23},
	date = {2023-06-06},
	eprinttype = {arxiv},
	eprint = {2306.03917 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning, Computer Science - Artificial Intelligence},
}

@misc{anthropic_claude_2024,
	title = {The Claude 3 Model Family: Opus, Sonnet, Haiku},
	url = {https://www-cdn.anthropic.com/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627/Model_Card_Claude_3.pdf},
	abstract = {We introduce Claude 3, a new family of large multimodal models – Claude 3 Opus, our most capable offering, Claude 3 Sonnet, which provides a combination of skills and speed, and Claude 3 Haiku, our fastest and least expensive model. All new models have vision capabilities that enable them to process and analyze image data. The Claude 3 family demonstrates strong performance across benchmark evaluations and sets a new standard on measures of reasoning, math, and coding. Claude 3 Opus achieves state-of-the-art results on evaluations like {GPQA} [1], {MMLU} [2], {MMMU} [3] and many more. Claude 3 Haiku performs as well or better than Claude 2 [4] on most pure-text tasks, while Sonnet and Opus significantly outperform it. Additionally, these models exhibit improved fluency in non-English languages, making them more versatile for a global audience. In this report, we provide an in-depth analysis of our evaluations, focusing on core capabilities, safety, societal impacts, and the catastrophic risk assessments we committed to in our Responsible scaling Policy [5].},
	author = {{Anthropic}},
	urldate = {2024-03-07},
	date = {2024-03},
}

@misc{kuratov_search_2024,
	title = {In Search of Needles in a 11M Haystack: Recurrent Memory Finds What {LLMs} Miss},
	url = {http://arxiv.org/abs/2402.10790},
	shorttitle = {In Search of Needles in a 11M Haystack},
	abstract = {This paper addresses the challenge of processing long documents using generative transformer models. To evaluate different approaches, we introduce {BABILong}, a new benchmark designed to assess model capabilities in extracting and processing distributed facts within extensive texts. Our evaluation, which includes benchmarks for {GPT}-4 and {RAG}, reveals that common methods are effective only for sequences up to \$10{\textasciicircum}4\$ elements. In contrast, fine-tuning {GPT}-2 with recurrent memory augmentations enables it to handle tasks involving up to \$11{\textbackslash}times 10{\textasciicircum}6\$ elements. This achievement marks a substantial leap, as it is by far the longest input processed by any neural network model to date, demonstrating a significant improvement in the processing capabilities for long sequences.},
	number = {{arXiv}:2402.10790},
	publisher = {{arXiv}},
	author = {Kuratov, Yuri and Bulatov, Aydar and Anokhin, Petr and Sorokin, Dmitry and Sorokin, Artyom and Burtsev, Mikhail},
	urldate = {2024-03-07},
	date = {2024-02-20},
	eprinttype = {arxiv},
	eprint = {2402.10790 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning, Computer Science - Artificial Intelligence},
}

@article{webb_emergent_2023,
	title = {Emergent analogical reasoning in large language models},
	volume = {7},
	issn = {2397-3374},
	url = {https://www.nature.com/articles/s41562-023-01659-w},
	doi = {10.1038/s41562-023-01659-w},
	pages = {1526--1541},
	number = {9},
	journaltitle = {Nature Human Behaviour},
	shortjournal = {Nat Hum Behav},
	author = {Webb, Taylor and Holyoak, Keith J. and Lu, Hongjing},
	urldate = {2024-02-23},
	date = {2023-07-31},
	langid = {english},
}

@article{thaler_anomalies_1988,
	title = {Anomalies: The Ultimatum Game},
	volume = {2},
	issn = {0895-3309},
	url = {https://pubs.aeaweb.org/doi/10.1257/jep.2.4.195},
	doi = {10.1257/jep.2.4.195},
	shorttitle = {Anomalies},
	abstract = {This paper discusses simple ultimatum games, two-stage bargaining ultimatum games, and multistage ultimatum games. Finally, I discuss ultimatums in the market. Any time a monopolist (or monopsonist) sets a price (or wage), it has the quality of an ultimatum.},
	pages = {195--206},
	number = {4},
	journaltitle = {Journal of Economic Perspectives},
	shortjournal = {Journal of Economic Perspectives},
	author = {Thaler, Richard H},
	urldate = {2024-02-23},
	date = {1988-11-01},
	langid = {english},
}

@article{ferreira_misinterpretations_2001,
	title = {Misinterpretations of Garden-Path Sentences: Implications for Models of Sentence Processing and Reanalysis},
	volume = {30},
	issn = {00906905},
	url = {http://link.springer.com/10.1023/A:1005290706460},
	doi = {10.1023/A:1005290706460},
	pages = {3--20},
	number = {1},
	journaltitle = {Journal of Psycholinguistic Research},
	author = {Ferreira, Fernanda and Christianson, Kiel and Hollingworth, Andrew},
	urldate = {2024-02-23},
	date = {2001},
}

@article{kimball_standard_1993,
	title = {Standard Risk Aversion},
	volume = {61},
	issn = {00129682},
	url = {https://www.jstor.org/stable/2951719?origin=crossref},
	doi = {10.2307/2951719},
	pages = {589},
	number = {3},
	journaltitle = {Econometrica},
	shortjournal = {Econometrica},
	author = {Kimball, Miles S.},
	urldate = {2024-02-23},
	date = {1993-05},
}

@article{blass_milgram_1999,
	title = {The Milgram Paradigm After 35 Years: Some Things We Now Know About Obedience to Authority},
	volume = {29},
	issn = {0021-9029, 1559-1816},
	url = {https://onlinelibrary.wiley.com/doi/10.1111/j.1559-1816.1999.tb00134.x},
	doi = {10.1111/j.1559-1816.1999.tb00134.x},
	shorttitle = {The Milgram Paradigm After 35 Years},
	abstract = {Guided by the belief that we cannot make broad extrapolations from the obedience studies without first firmly establishing what has and has not been found using the paradigm itself, this article draws on 35 years of accumulated research and writings on the obedience paradigm to present a status report on the following salient questions and issues surrounding obedience to authority: (a) How should we construe the nature of authority in the obedience experiment? (b) Do predictions of those unfamiliar with the obedience experiment underestimate the actual obedience rates? (c) Are there gender differences in obedience? and (d) Have obedience rates changed over time?},
	pages = {955--978},
	number = {5},
	journaltitle = {Journal of Applied Social Psychology},
	shortjournal = {J Applied Social Pyschol},
	author = {Blass, Thomas},
	urldate = {2024-02-23},
	date = {1999-05},
	langid = {english},
}

@article{aher_using_2022,
	title = {Using Large Language Models to Simulate Multiple Humans and Replicate Human Subject Studies},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2208.10264},
	doi = {10.48550/ARXIV.2208.10264},
	abstract = {We introduce a new type of test, called a Turing Experiment ({TE}), for evaluating to what extent a given language model, such as {GPT} models, can simulate different aspects of human behavior. A {TE} can also reveal consistent distortions in a language model's simulation of a specific human behavior. Unlike the Turing Test, which involves simulating a single arbitrary individual, a {TE} requires simulating a representative sample of participants in human subject research. We carry out {TEs} that attempt to replicate well-established findings from prior studies. We design a methodology for simulating {TEs} and illustrate its use to compare how well different language models are able to reproduce classic economic, psycholinguistic, and social psychology experiments: Ultimatum Game, Garden Path Sentences, Milgram Shock Experiment, and Wisdom of Crowds. In the first three {TEs}, the existing findings were replicated using recent models, while the last {TE} reveals a "hyper-accuracy distortion" present in some language models (including {ChatGPT} and {GPT}-4), which could affect downstream applications in education and the arts.},
	author = {Aher, Gati and Arriaga, Rosa I. and Kalai, Adam Tauman},
	urldate = {2024-02-23},
	date = {2022},
	note = {Publisher: {arXiv}
Version Number: 5},
	keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}

@article{winter_more_2023,
	title = {More is Better: English Language Statistics are Biased Toward Addition},
	volume = {47},
	issn = {0364-0213, 1551-6709},
	url = {https://onlinelibrary.wiley.com/doi/10.1111/cogs.13254},
	doi = {10.1111/cogs.13254},
	shorttitle = {More is Better},
	abstract = {We have evolved to become who we are, at least in part, due to our general drive to create new things and ideas. When seeking to improve our creations, ideas, or situations, we systematically overlook opportunities to perform subtractive changes. For example, when tasked with giving feedback on an academic paper, reviewers will tend to suggest additional explanations and analyses rather than delete existing ones. Here, we show that this addition bias is systematically reflected in English language statistics along several distinct dimensions. First, we show that words associated with an increase in quantity or number (e.g., add, addition, more, most ) are more frequent than words associated with a decrease in quantity or number (e.g., subtract, subtraction, less, least). Second, we show that in binomial expressions, addition‐related words are mentioned first, that is, add and subtract rather than subtract and add. Third, we show that the distributional semantics of verbs of change, such as to improve and to transform, overlap more with the distributional semantics of add/increase than subtract/decrease, which suggests that change verbs are implicitly biased toward addition. Fourth, addition‐related words have more positive connotations than subtraction‐related words. Fifth, we demonstrate that state‐of‐the‐art large language models, such as the Generative Pre‐trained Transformer ({GPT}‐3), are also biased toward addition. We discuss the implications of our results for research on cognitive biases and decision‐making.},
	pages = {e13254},
	number = {4},
	journaltitle = {Cognitive Science},
	shortjournal = {Cognitive Science},
	author = {Winter, Bodo and Fischer, Martin H. and Scheepers, Christoph and Myachykov, Andriy},
	urldate = {2024-02-23},
	date = {2023-04},
	langid = {english},
}

@article{adams_people_2021,
	title = {People systematically overlook subtractive changes},
	volume = {592},
	issn = {0028-0836, 1476-4687},
	url = {https://www.nature.com/articles/s41586-021-03380-y},
	doi = {10.1038/s41586-021-03380-y},
	pages = {258--261},
	number = {7853},
	journaltitle = {Nature},
	shortjournal = {Nature},
	author = {Adams, Gabrielle S. and Converse, Benjamin A. and Hales, Andrew H. and Klotz, Leidy E.},
	urldate = {2024-02-23},
	date = {2021-04-08},
	langid = {english},
}

@article{koo_benchmarking_2023,
	title = {Benchmarking Cognitive Biases in Large Language Models as Evaluators},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2309.17012},
	doi = {10.48550/arXiv.2309.17012},
	abstract = {Large Language Models ({LLMs}) have recently been shown to be effective as automatic evaluators with simple prompting and in-context learning. In this work, we assemble 15 {LLMs} of four different size ranges and evaluate their output responses by preference ranking from the other {LLMs} as evaluators, such as System Star is better than System Square. We then evaluate the quality of ranking outputs introducing the Cognitive Bias Benchmark for {LLMs} as Evaluators ({CoBBLEr}), a benchmark to measure six different cognitive biases in {LLM} evaluation outputs, such as the Egocentric bias where a model prefers to rank its own outputs highly in evaluation. We find that {LLMs} are biased text quality evaluators, exhibiting strong indications on our bias benchmark (average of 40\% of comparisons across all models) within each of their evaluations that question their robustness as evaluators. Furthermore, we examine the correlation between human and machine preferences and calculate the average Rank-Biased Overlap ({RBO}) score to be 49.6\%, indicating that machine preferences are misaligned with humans. According to our findings, {LLMs} may still be unable to be utilized for automatic annotation aligned with human preferences. Our project page is at: https://minnesotanlp.github.io/cobbler.},
	author = {Koo, Ryan and Lee, Minhwa and Raheja, Vipul and Park, Jong Inn and Kim, Zae Myung and Kang, Dongyeop},
	urldate = {2024-02-23},
	date = {2023},
	note = {Publisher: {arXiv}
Version Number: 1},
	keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG})},
}

@article{shaki_cognitive_2023,
	title = {Cognitive Effects in Large Language Models},
	rights = {{arXiv}.org perpetual, non-exclusive license},
	url = {https://arxiv.org/abs/2308.14337},
	doi = {10.48550/ARXIV.2308.14337},
	abstract = {Large Language Models ({LLMs}) such as {ChatGPT} have received enormous attention over the past year and are now used by hundreds of millions of people every day. The rapid adoption of this technology naturally raises questions about the possible biases such models might exhibit. In this work, we tested one of these models ({GPT}-3) on a range of cognitive effects, which are systematic patterns that are usually found in human cognitive tasks. We found that {LLMs} are indeed prone to several human cognitive effects. Specifically, we show that the priming, distance, {SNARC}, and size congruity effects were presented with {GPT}-3, while the anchoring effect is absent. We describe our methodology, and specifically the way we converted real-world experiments to text-based experiments. Finally, we speculate on the possible reasons why {GPT}-3 exhibits these effects and discuss whether they are imitated or reinvented.},
	author = {Shaki, Jonathan and Kraus, Sarit and Wooldridge, Michael},
	urldate = {2024-02-23},
	date = {2023},
	note = {Publisher: {arXiv}
Version Number: 1},
	keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences},
}

@article{hopkins_artificial_2023,
	title = {Artificial intelligence chatbots will revolutionize how cancer patients access information: {ChatGPT} represents a paradigm-shift},
	volume = {7},
	issn = {2515-5091},
	url = {https://academic.oup.com/jncics/article/doi/10.1093/jncics/pkad010/7049531},
	doi = {10.1093/jncics/pkad010},
	shorttitle = {Artificial intelligence chatbots will revolutionize how cancer patients access information},
	abstract = {On November 30, 2022, {OpenAI} enabled public access to {ChatGPT}, a next-generation artificial intelligence with a highly sophisticated ability to write, solve coding issues, and answer questions. This communication draws attention to the prospect that {ChatGPT} and its successors will become important virtual assistants to patients and health-care providers. In our assessments, ranging from answering basic fact-based questions to responding to complex clinical questions, {ChatGPT} demonstrated a remarkable ability to formulate interpretable responses, which appeared to minimize the likelihood of alarm compared with Google’s feature snippet. Arguably, the {ChatGPT} use case presents an urgent need for regulators and health-care professionals to be involved in developing standards for minimum quality and to raise patient awareness of current limitations of emerging artificial intelligence assistants. This commentary aims to raise awareness at the tipping point of a paradigm shift.},
	pages = {pkad010},
	number = {2},
	journaltitle = {{JNCI} Cancer Spectrum},
	author = {Hopkins, Ashley M and Logan, Jessica M and Kichenadasse, Ganessan and Sorich, Michael J},
	urldate = {2024-02-11},
	date = {2023-03-01},
	langid = {english},
}

@article{hirosawa_diagnostic_2023,
	title = {Diagnostic Accuracy of Differential-Diagnosis Lists Generated by Generative Pretrained Transformer 3 Chatbot for Clinical Vignettes with Common Chief Complaints: A Pilot Study},
	volume = {20},
	issn = {1660-4601},
	url = {https://www.mdpi.com/1660-4601/20/4/3378},
	doi = {10.3390/ijerph20043378},
	shorttitle = {Diagnostic Accuracy of Differential-Diagnosis Lists Generated by Generative Pretrained Transformer 3 Chatbot for Clinical Vignettes with Common Chief Complaints},
	abstract = {The diagnostic accuracy of differential diagnoses generated by artificial intelligence ({AI}) chatbots, including the generative pretrained transformer 3 ({GPT}-3) chatbot ({ChatGPT}-3) is unknown. This study evaluated the accuracy of differential-diagnosis lists generated by {ChatGPT}-3 for clinical vignettes with common chief complaints. General internal medicine physicians created clinical cases, correct diagnoses, and five differential diagnoses for ten common chief complaints. The rate of correct diagnosis by {ChatGPT}-3 within the ten differential-diagnosis lists was 28/30 (93.3\%). The rate of correct diagnosis by physicians was still superior to that by {ChatGPT}-3 within the five differential-diagnosis lists (98.3\% vs. 83.3\%, p = 0.03). The rate of correct diagnosis by physicians was also superior to that by {ChatGPT}-3 in the top diagnosis (53.3\% vs. 93.3\%, p {\textless} 0.001). The rate of consistent differential diagnoses among physicians within the ten differential-diagnosis lists generated by {ChatGPT}-3 was 62/88 (70.5\%). In summary, this study demonstrates the high diagnostic accuracy of differential-diagnosis lists generated by {ChatGPT}-3 for clinical cases with common chief complaints. This suggests that {AI} chatbots such as {ChatGPT}-3 can generate a well-differentiated diagnosis list for common chief complaints. However, the order of these lists can be improved in the future.},
	pages = {3378},
	number = {4},
	journaltitle = {International Journal of Environmental Research and Public Health},
	shortjournal = {{IJERPH}},
	author = {Hirosawa, Takanobu and Harada, Yukinori and Yokose, Masashi and Sakamoto, Tetsu and Kawamura, Ren and Shimizu, Taro},
	urldate = {2024-02-11},
	date = {2023-02-15},
	langid = {english},
}

@article{rao_assessing_2023,
	title = {Assessing the Utility of {ChatGPT} Throughout the Entire Clinical Workflow},
	doi = {10.1101/2023.02.21.23285886},
	abstract = {{IMPORTANCE}: Large language model ({LLM}) artificial intelligence ({AI}) chatbots direct the power of large training datasets towards successive, related tasks, as opposed to single-ask tasks, for which {AI} already achieves impressive performance. The capacity of {LLMs} to assist in the full scope of iterative clinical reasoning via successive prompting, in effect acting as virtual physicians, has not yet been evaluated.
{OBJECTIVE}: To evaluate {ChatGPT}'s capacity for ongoing clinical decision support via its performance on standardized clinical vignettes.
{DESIGN}: We inputted all 36 published clinical vignettes from the Merck Sharpe \& Dohme ({MSD}) Clinical Manual into {ChatGPT} and compared accuracy on differential diagnoses, diagnostic testing, final diagnosis, and management based on patient age, gender, and case acuity.
{SETTING}: {ChatGPT}, a publicly available {LLM}.
{PARTICIPANTS}: Clinical vignettes featured hypothetical patients with a variety of age and gender identities, and a range of Emergency Severity Indices ({ESIs}) based on initial clinical presentation.
{EXPOSURES}: {MSD} Clinical Manual vignettes.
{MAIN} {OUTCOMES} {AND} {MEASURES}: We measured the proportion of correct responses to the questions posed within the clinical vignettes tested.
{RESULTS}: {ChatGPT} achieved 71.7\% (95\% {CI}, 69.3\% to 74.1\%) accuracy overall across all 36 clinical vignettes. The {LLM} demonstrated the highest performance in making a final diagnosis with an accuracy of 76.9\% (95\% {CI}, 67.8\% to 86.1\%), and the lowest performance in generating an initial differential diagnosis with an accuracy of 60.3\% (95\% {CI}, 54.2\% to 66.6\%). Compared to answering questions about general medical knowledge, {ChatGPT} demonstrated inferior performance on differential diagnosis (β=-15.8\%, p{\textless}0.001) and clinical management (β=-7.4\%, p=0.02) type questions.
{CONCLUSIONS} {AND} {RELEVANCE}: {ChatGPT} achieves impressive accuracy in clinical decision making, with particular strengths emerging as it has more clinical information at its disposal.},
	pages = {2023.02.21.23285886},
	journaltitle = {{medRxiv}: The Preprint Server for Health Sciences},
	shortjournal = {{medRxiv}},
	author = {Rao, Arya and Pang, Michael and Kim, John and Kamineni, Meghana and Lie, Winston and Prasad, Anoop K. and Landman, Adam and Dreyer, Keith J. and Succi, Marc D.},
	date = {2023-02-26},
	pmid = {36865204},
	pmcid = {PMC9980239},
}

@article{gilson_how_2023,
	title = {How Does {ChatGPT} Perform on the United States Medical Licensing Examination? The Implications of Large Language Models for Medical Education and Knowledge Assessment},
	volume = {9},
	issn = {2369-3762},
	doi = {10.2196/45312},
	shorttitle = {How Does {ChatGPT} Perform on the United States Medical Licensing Examination?},
	abstract = {{BACKGROUND}: Chat Generative Pre-trained Transformer ({ChatGPT}) is a 175-billion-parameter natural language processing model that can generate conversation-style responses to user input. {OBJECTIVE}: This study aimed to evaluate the performance of {ChatGPT} on questions within the scope of the United States Medical Licensing Examination Step 1 and Step 2 exams, as well as to analyze responses for user interpretability. {METHODS}: We used 2 sets of multiple-choice questions to evaluate {ChatGPT}'s performance, each with questions pertaining to Step 1 and Step 2. The first set was derived from {AMBOSS}, a commonly used question bank for medical students, which also provides statistics on question difficulty and the performance on an exam relative to the user base. The second set was the National Board of Medical Examiners ({NBME}) free 120 questions. {ChatGPT}'s performance was compared to 2 other large language models, {GPT}-3 and {InstructGPT}. The text output of each {ChatGPT} response was evaluated across 3 qualitative metrics: logical justification of the answer selected, presence of information internal to the question, and presence of information external to the question. {RESULTS}: Of the 4 data sets, {AMBOSS}-Step1, {AMBOSS}-Step2, {NBME}-Free-Step1, and {NBME}-Free-Step2, {ChatGPT} achieved accuracies of 44\% (44/100), 42\% (42/100), 64.4\% (56/87), and 57.8\% (59/102), respectively. {ChatGPT} outperformed {InstructGPT} by 8.15\% on average across all data sets, and {GPT}-3 performed similarly to random chance. The model demonstrated a significant decrease in performance as question difficulty increased (P=.01) within the {AMBOSS}-Step1 data set. We found that logical justification for {ChatGPT}'s answer selection was present in 100\% of outputs of the {NBME} data sets. Internal information to the question was present in 96.8\% (183/189) of all questions. The presence of information external to the question was 44.5\% and 27\% lower for incorrect answers relative to correct answers on the {NBME}-Free-Step1 (P{\textless}.001) and {NBME}-Free-Step2 (P=.001) data sets, respectively. {CONCLUSIONS}: {ChatGPT} marks a significant improvement in natural language processing models on the tasks of medical question answering. By performing at a greater than 60\% threshold on the {NBME}-Free-Step-1 data set, we show that the model achieves the equivalent of a passing score for a third-year medical student. Additionally, we highlight {ChatGPT}'s capacity to provide logic and informational context across the majority of answers. These facts taken together make a compelling case for the potential applications of {ChatGPT} as an interactive medical education tool to support learning.},
	pages = {e45312},
	journaltitle = {{JMIR} medical education},
	shortjournal = {{JMIR} Med Educ},
	author = {Gilson, Aidan and Safranek, Conrad W. and Huang, Thomas and Socrates, Vimig and Chi, Ling and Taylor, Richard Andrew and Chartash, David},
	date = {2023-02-08},
	pmid = {36753318},
	pmcid = {PMC9947764},
	keywords = {artificial intelligence, chatbot, {ChatGPT}, conversational agent, education technology, generative pre-trained transformer, {GPT}, machine learning, medical education, {MedQA}, natural language processing, {NLP}},
}

@article{brin_comparing_2023,
	title = {Comparing {ChatGPT} and {GPT}-4 performance in {USMLE} soft skill assessments},
	volume = {13},
	issn = {2045-2322},
	url = {https://www.nature.com/articles/s41598-023-43436-9},
	doi = {10.1038/s41598-023-43436-9},
	abstract = {The United States Medical Licensing Examination ({USMLE}) has been a subject of performance study for artificial intelligence ({AI}) models. However, their performance on questions involving {USMLE} soft skills remains unexplored. This study aimed to evaluate {ChatGPT} and {GPT}-4 on {USMLE} questions involving communication skills, ethics, empathy, and professionalism. We used 80 {USMLE}-style questions involving soft skills, taken from the {USMLE} website and the {AMBOSS} question bank. A follow-up query was used to assess the models’ consistency. The performance of the {AI} models was compared to that of previous {AMBOSS} users. {GPT}-4 outperformed {ChatGPT}, correctly answering 90\% compared to {ChatGPT}’s 62.5\%. {GPT}-4 showed more confidence, not revising any responses, while {ChatGPT} modified its original answers 82.5\% of the time. The performance of {GPT}-4 was higher than that of {AMBOSS}'s past users. Both {AI} models, notably {GPT}-4, showed capacity for empathy, indicating {AI}'s potential to meet the complex interpersonal, ethical, and professional demands intrinsic to the practice of medicine.},
	pages = {16492},
	number = {1},
	journaltitle = {Scientific Reports},
	shortjournal = {Sci Rep},
	author = {Brin, Dana and Sorin, Vera and Vaid, Akhil and Soroush, Ali and Glicksberg, Benjamin S. and Charney, Alexander W. and Nadkarni, Girish and Klang, Eyal},
	urldate = {2024-02-11},
	date = {2023-10-01},
	langid = {english},
}

@article{zhang_unexpectedly_2024,
	title = {Unexpectedly low accuracy of {GPT}-4 in identifying common liver diseases from {CT} scan images},
	issn = {15908658},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1590865824002111},
	doi = {10.1016/j.dld.2024.01.191},
	pages = {S1590865824002111},
	journaltitle = {Digestive and Liver Disease},
	shortjournal = {Digestive and Liver Disease},
	author = {Zhang, Yiwen and Wu, Liwei and Wang, Yangang and Sheng, Bin and Tham, Yih Chung and Ji, Hongwei and Chen, Ying and Ren, Linlin and Liu, Hanyun and Xu, Lili},
	urldate = {2024-02-11},
	date = {2024-02},
	langid = {english},
}

@article{ji_survey_2023,
	title = {Survey of Hallucination in Natural Language Generation},
	volume = {55},
	issn = {0360-0300, 1557-7341},
	url = {https://dl.acm.org/doi/10.1145/3571730},
	doi = {10.1145/3571730},
	abstract = {Natural Language Generation ({NLG}) has improved exponentially in recent years thanks to the development of sequence-to-sequence deep learning technologies such as Transformer-based language models. This advancement has led to more fluent and coherent {NLG}, leading to improved development in downstream tasks such as abstractive summarization, dialogue generation, and data-to-text generation. However, it is also apparent that deep learning based generation is prone to hallucinate unintended text, which degrades the system performance and fails to meet user expectations in many real-world scenarios. To address this issue, many studies have been presented in measuring and mitigating hallucinated texts, but these have never been reviewed in a comprehensive manner before. In this survey, we thus provide a broad overview of the research progress and challenges in the hallucination problem in {NLG}. The survey is organized into two parts: (1) a general overview of metrics, mitigation methods, and future directions, and (2) an overview of task-specific research progress on hallucinations in the following downstream tasks, namely abstractive summarization, dialogue generation, generative question answering, data-to-text generation, and machine translation. This survey serves to facilitate collaborative efforts among researchers in tackling the challenge of hallucinated texts in {NLG}.},
	pages = {1--38},
	number = {12},
	journaltitle = {{ACM} Computing Surveys},
	shortjournal = {{ACM} Comput. Surv.},
	author = {Ji, Ziwei and Lee, Nayeon and Frieske, Rita and Yu, Tiezheng and Su, Dan and Xu, Yan and Ishii, Etsuko and Bang, Ye Jin and Madotto, Andrea and Fung, Pascale},
	urldate = {2024-02-11},
	date = {2023-12-31},
	langid = {english},
}

@article{khullar_large_2024,
	title = {Large Language Models in Health Care: Charting a Path Toward Accurate, Explainable, and Secure {AI}},
	issn = {0884-8734, 1525-1497},
	url = {https://link.springer.com/10.1007/s11606-024-08657-2},
	doi = {10.1007/s11606-024-08657-2},
	shorttitle = {Large Language Models in Health Care},
	pages = {s11606--024--08657--2},
	journaltitle = {Journal of General Internal Medicine},
	shortjournal = {J {GEN} {INTERN} {MED}},
	author = {Khullar, Dhruv and Wang, Xingbo and Wang, Fei},
	urldate = {2024-02-11},
	date = {2024-02-02},
	langid = {english},
}

@article{savcisens_using_2023,
	title = {Using sequences of life-events to predict human lives},
	volume = {4},
	issn = {2662-8457},
	url = {https://www.nature.com/articles/s43588-023-00573-5},
	doi = {10.1038/s43588-023-00573-5},
	pages = {43--56},
	number = {1},
	journaltitle = {Nature Computational Science},
	shortjournal = {Nat Comput Sci},
	author = {Savcisens, Germans and Eliassi-Rad, Tina and Hansen, Lars Kai and Mortensen, Laust Hvas and Lilleholt, Lau and Rogers, Anna and Zettler, Ingo and Lehmann, Sune},
	urldate = {2024-02-11},
	date = {2023-12-18},
	langid = {english},
}

@misc{zhang_opt_2022,
	title = {{OPT}: Open Pre-trained Transformer Language Models},
	url = {http://arxiv.org/abs/2205.01068},
	doi = {10.48550/arXiv.2205.01068},
	shorttitle = {{OPT}},
	abstract = {Large language models, which are often trained for hundreds of thousands of compute days, have shown remarkable capabilities for zero- and few-shot learning. Given their computational cost, these models are difficult to replicate without significant capital. For the few that are available through {APIs}, no access is granted to the full model weights, making them difficult to study. We present Open Pre-trained Transformers ({OPT}), a suite of decoder-only pre-trained transformers ranging from 125M to 175B parameters, which we aim to fully and responsibly share with interested researchers. We show that {OPT}-175B is comparable to {GPT}-3, while requiring only 1/7th the carbon footprint to develop. We are also releasing our logbook detailing the infrastructure challenges we faced, along with code for experimenting with all of the released models.},
	number = {{arXiv}:2205.01068},
	publisher = {{arXiv}},
	author = {Zhang, Susan and Roller, Stephen and Goyal, Naman and Artetxe, Mikel and Chen, Moya and Chen, Shuohui and Dewan, Christopher and Diab, Mona and Li, Xian and Lin, Xi Victoria and Mihaylov, Todor and Ott, Myle and Shleifer, Sam and Shuster, Kurt and Simig, Daniel and Koura, Punit Singh and Sridhar, Anjali and Wang, Tianlu and Zettlemoyer, Luke},
	urldate = {2024-02-24},
	date = {2022-06-21},
	eprinttype = {arxiv},
	eprint = {2205.01068 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
}

@inproceedings{samsi_words_2023,
	location = {Boston, {MA}, {USA}},
	title = {From Words to Watts: Benchmarking the Energy Costs of Large Language Model Inference},
	isbn = {9798350308600},
	url = {https://ieeexplore.ieee.org/document/10363447/},
	doi = {10.1109/HPEC58863.2023.10363447},
	shorttitle = {From Words to Watts},
	eventtitle = {2023 {IEEE} High Performance Extreme Computing Conference ({HPEC})},
	pages = {1--9},
	booktitle = {2023 {IEEE} High Performance Extreme Computing Conference ({HPEC})},
	publisher = {{IEEE}},
	author = {Samsi, Siddharth and Zhao, Dan and {McDonald}, Joseph and Li, Baolin and Michaleas, Adam and Jones, Michael and Bergeron, William and Kepner, Jeremy and Tiwari, Devesh and Gadepally, Vijay},
	urldate = {2024-02-24},
	date = {2023-09-25},
}

@article{faiz_llmcarbon_2023,
	title = {{LLMCarbon}: Modeling the end-to-end Carbon Footprint of Large Language Models},
	rights = {Creative Commons Attribution 4.0 International},
	url = {https://arxiv.org/abs/2309.14393},
	doi = {10.48550/arXiv.2309.14393},
	shorttitle = {{LLMCarbon}},
	abstract = {The carbon footprint associated with large language models ({LLMs}) is a significant concern, encompassing emissions from their training, inference, experimentation, and storage processes, including operational and embodied carbon emissions. An essential aspect is accurately estimating the carbon impact of emerging {LLMs} even before their training, which heavily relies on {GPU} usage. Existing studies have reported the carbon footprint of {LLM} training, but only one tool, mlco2, can predict the carbon footprint of new neural networks prior to physical training. However, mlco2 has several serious limitations. It cannot extend its estimation to dense or mixture-of-experts ({MoE}) {LLMs}, disregards critical architectural parameters, focuses solely on {GPUs}, and cannot model embodied carbon footprints. Addressing these gaps, we introduce {\textbackslash}textit\{{\textbackslash}carb\}, an end-to-end carbon footprint projection model designed for both dense and {MoE} {LLMs}. Compared to mlco2, {\textbackslash}carb{\textasciitilde}significantly enhances the accuracy of carbon footprint estimations for various {LLMs}. The source code is released at {\textbackslash}url\{https://github.com/{SotaroKaneda}/{MLCarbon}\}.},
	author = {Faiz, Ahmad and Kaneda, Sotaro and Wang, Ruhan and Osi, Rita and Sharma, Prateek and Chen, Fan and Jiang, Lei},
	urldate = {2024-02-24},
	date = {2023},
	note = {Publisher: {arXiv}
Version Number: 2},
	keywords = {Artificial Intelligence (cs.{AI}), Computation and Language (cs.{CL}), {FOS}: Computer and information sciences, Machine Learning (cs.{LG}), Computers and Society (cs.{CY})},
}

@article{george_environmental_2023,
	title = {The Environmental Impact of {AI}: A Case Study of Water Consumption by Chat {GPT}},
	rights = {Creative Commons Attribution 4.0 International, Open Access},
	url = {https://zenodo.org/record/7855594},
	doi = {10.5281/ZENODO.7855594},
	shorttitle = {The Environmental Impact of {AI}},
	abstract = {As {AI} is becoming more a part of our lives, people are starting to worry about the negative consequences it might have on the environment. One of the major issues is its high water consumption. The water[21] consumption of {AI} models, including Chat {GPT}, is a major concern and must be managed effectively to reduce environmental harm. This document examines the amount[15] of water that is utilized by Chat {GPT} and other {AI} models and investigates the impact that it may have on the environment, as well as possible solutions to control their water usage. The study further considers the plausibility and usefulness of these approaches. The findings imply that although water usage of {AI} systems is significantly lower compared to other industries, it is still a matter of concern. {AI} models can have a significant water footprint, but this can be reduced by taking certain measures such as improving energy efficiency, utilizing renewable energy sources, optimizing algorithms and implementing strategies to conserve water. Despite the potential of these solutions, there are still issues to be addressed, such as the expense associated with implementation, and further research is required for optimum utilization. In conclusion, this document emphasizes the relevance of recognizing the water footprint caused by {AI} models, giving important details regarding potential solutions to minimize their environmental impact.},
	author = {George, A.Shaji and {A.S.Hovan George} and {A.S.Gabrio Martin}},
	urldate = {2024-02-23},
	date = {2023-04-20},
	note = {Publisher: Zenodo},
	keywords = {Water footprint, Artificial intelligence, Data centers, Energy efficiency, Cooling systems, Sustainability, Environmental impact, Resource management.},
}

@misc{openai_gpt_4_2023,
	title = {{GPT}-4 Technical Report},
	url = {http://arxiv.org/abs/2303.08774},
	doi = {10.48550/arXiv.2303.08774},
	abstract = {We report the development of {GPT}-4, a large-scale, multimodal model which can accept image and text inputs and produce text outputs. While less capable than humans in many real-world scenarios, {GPT}-4 exhibits human-level performance on various professional and academic benchmarks, including passing a simulated bar exam with a score around the top 10\% of test takers. {GPT}-4 is a Transformer-based model pre-trained to predict the next token in a document. The post-training alignment process results in improved performance on measures of factuality and adherence to desired behavior. A core component of this project was developing infrastructure and optimization methods that behave predictably across a wide range of scales. This allowed us to accurately predict some aspects of {GPT}-4's performance based on models trained with no more than 1/1,000th the compute of {GPT}-4.},
	number = {{arXiv}:2303.08774},
	publisher = {{arXiv}},
	author = {{OpenAI} and Achiam, Josh and Adler, Steven and Agarwal, Sandhini and Ahmad, Lama and Akkaya, Ilge and Aleman, Florencia Leoni and Almeida, Diogo and Altenschmidt, Janko and Altman, Sam and Anadkat, Shyamal and Avila, Red and Babuschkin, Igor and Balaji, Suchir and Balcom, Valerie and Baltescu, Paul and Bao, Haiming and Bavarian, Mo and Belgum, Jeff and Bello, Irwan and Berdine, Jake and Bernadett-Shapiro, Gabriel and Berner, Christopher and Bogdonoff, Lenny and Boiko, Oleg and Boyd, Madelaine and Brakman, Anna-Luisa and Brockman, Greg and Brooks, Tim and Brundage, Miles and Button, Kevin and Cai, Trevor and Campbell, Rosie and Cann, Andrew and Carey, Brittany and Carlson, Chelsea and Carmichael, Rory and Chan, Brooke and Chang, Che and Chantzis, Fotis and Chen, Derek and Chen, Sully and Chen, Ruby and Chen, Jason and Chen, Mark and Chess, Ben and Cho, Chester and Chu, Casey and Chung, Hyung Won and Cummings, Dave and Currier, Jeremiah and Dai, Yunxing and Decareaux, Cory and Degry, Thomas and Deutsch, Noah and Deville, Damien and Dhar, Arka and Dohan, David and Dowling, Steve and Dunning, Sheila and Ecoffet, Adrien and Eleti, Atty and Eloundou, Tyna and Farhi, David and Fedus, Liam and Felix, Niko and Fishman, Simón Posada and Forte, Juston and Fulford, Isabella and Gao, Leo and Georges, Elie and Gibson, Christian and Goel, Vik and Gogineni, Tarun and Goh, Gabriel and Gontijo-Lopes, Rapha and Gordon, Jonathan and Grafstein, Morgan and Gray, Scott and Greene, Ryan and Gross, Joshua and Gu, Shixiang Shane and Guo, Yufei and Hallacy, Chris and Han, Jesse and Harris, Jeff and He, Yuchen and Heaton, Mike and Heidecke, Johannes and Hesse, Chris and Hickey, Alan and Hickey, Wade and Hoeschele, Peter and Houghton, Brandon and Hsu, Kenny and Hu, Shengli and Hu, Xin and Huizinga, Joost and Jain, Shantanu and Jain, Shawn and Jang, Joanne and Jiang, Angela and Jiang, Roger and Jin, Haozhun and Jin, Denny and Jomoto, Shino and Jonn, Billie and Jun, Heewoo and Kaftan, Tomer and Kaiser, Łukasz and Kamali, Ali and Kanitscheider, Ingmar and Keskar, Nitish Shirish and Khan, Tabarak and Kilpatrick, Logan and Kim, Jong Wook and Kim, Christina and Kim, Yongjik and Kirchner, Hendrik and Kiros, Jamie and Knight, Matt and Kokotajlo, Daniel and Kondraciuk, Łukasz and Kondrich, Andrew and Konstantinidis, Aris and Kosic, Kyle and Krueger, Gretchen and Kuo, Vishal and Lampe, Michael and Lan, Ikai and Lee, Teddy and Leike, Jan and Leung, Jade and Levy, Daniel and Li, Chak Ming and Lim, Rachel and Lin, Molly and Lin, Stephanie and Litwin, Mateusz and Lopez, Theresa and Lowe, Ryan and Lue, Patricia and Makanju, Anna and Malfacini, Kim and Manning, Sam and Markov, Todor and Markovski, Yaniv and Martin, Bianca and Mayer, Katie and Mayne, Andrew and {McGrew}, Bob and {McKinney}, Scott Mayer and {McLeavey}, Christine and {McMillan}, Paul and {McNeil}, Jake and Medina, David and Mehta, Aalok and Menick, Jacob and Metz, Luke and Mishchenko, Andrey and Mishkin, Pamela and Monaco, Vinnie and Morikawa, Evan and Mossing, Daniel and Mu, Tong and Murati, Mira and Murk, Oleg and Mély, David and Nair, Ashvin and Nakano, Reiichiro and Nayak, Rajeev and Neelakantan, Arvind and Ngo, Richard and Noh, Hyeonwoo and Ouyang, Long and O'Keefe, Cullen and Pachocki, Jakub and Paino, Alex and Palermo, Joe and Pantuliano, Ashley and Parascandolo, Giambattista and Parish, Joel and Parparita, Emy and Passos, Alex and Pavlov, Mikhail and Peng, Andrew and Perelman, Adam and Peres, Filipe de Avila Belbute and Petrov, Michael and Pinto, Henrique Ponde de Oliveira and Michael and Pokorny and Pokrass, Michelle and Pong, Vitchyr and Powell, Tolly and Power, Alethea and Power, Boris and Proehl, Elizabeth and Puri, Raul and Radford, Alec and Rae, Jack and Ramesh, Aditya and Raymond, Cameron and Real, Francis and Rimbach, Kendra and Ross, Carl and Rotsted, Bob and Roussez, Henri and Ryder, Nick and Saltarelli, Mario and Sanders, Ted and Santurkar, Shibani and Sastry, Girish and Schmidt, Heather and Schnurr, David and Schulman, John and Selsam, Daniel and Sheppard, Kyla and Sherbakov, Toki and Shieh, Jessica and Shoker, Sarah and Shyam, Pranav and Sidor, Szymon and Sigler, Eric and Simens, Maddie and Sitkin, Jordan and Slama, Katarina and Sohl, Ian and Sokolowsky, Benjamin and Song, Yang and Staudacher, Natalie and Such, Felipe Petroski and Summers, Natalie and Sutskever, Ilya and Tang, Jie and Tezak, Nikolas and Thompson, Madeleine and Tillet, Phil and Tootoonchian, Amin and Tseng, Elizabeth and Tuggle, Preston and Turley, Nick and Tworek, Jerry and Uribe, Juan Felipe Cerón and Vallone, Andrea and Vijayvergiya, Arun and Voss, Chelsea and Wainwright, Carroll and Wang, Justin Jay and Wang, Alvin and Wang, Ben and Ward, Jonathan and Wei, Jason and Weinmann, C. J. and Welihinda, Akila and Welinder, Peter and Weng, Jiayi and Weng, Lilian and Wiethoff, Matt and Willner, Dave and Winter, Clemens and Wolrich, Samuel and Wong, Hannah and Workman, Lauren and Wu, Sherwin and Wu, Jeff and Wu, Michael and Xiao, Kai and Xu, Tao and Yoo, Sarah and Yu, Kevin and Yuan, Qiming and Zaremba, Wojciech and Zellers, Rowan and Zhang, Chong and Zhang, Marvin and Zhao, Shengjia and Zheng, Tianhao and Zhuang, Juntang and Zhuk, William and Zoph, Barret},
	urldate = {2024-02-24},
	date = {2023-12-18},
	eprinttype = {arxiv},
	eprint = {2303.08774 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence},
}

@misc{bubeck_sparks_2023,
	title = {Sparks of Artificial General Intelligence: Early experiments with {GPT}-4},
	url = {http://arxiv.org/abs/2303.12712},
	doi = {10.48550/arXiv.2303.12712},
	shorttitle = {Sparks of Artificial General Intelligence},
	abstract = {Artificial intelligence ({AI}) researchers have been developing and refining large language models ({LLMs}) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by {OpenAI}, {GPT}-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of {GPT}-4, when it was still in active development by {OpenAI}. We contend that (this early version of) {GPT}-4 is part of a new cohort of {LLMs} (along with {ChatGPT} and Google's {PaLM} for example) that exhibit more general intelligence than previous {AI} models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, {GPT}-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, {GPT}-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as {ChatGPT}. Given the breadth and depth of {GPT}-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence ({AGI}) system. In our exploration of {GPT}-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of {AGI}, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.},
	number = {{arXiv}:2303.12712},
	publisher = {{arXiv}},
	author = {Bubeck, Sébastien and Chandrasekaran, Varun and Eldan, Ronen and Gehrke, Johannes and Horvitz, Eric and Kamar, Ece and Lee, Peter and Lee, Yin Tat and Li, Yuanzhi and Lundberg, Scott and Nori, Harsha and Palangi, Hamid and Ribeiro, Marco Tulio and Zhang, Yi},
	urldate = {2024-02-24},
	date = {2023-04-13},
	eprinttype = {arxiv},
	eprint = {2303.12712 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence},
}

@misc{schaeffer_are_2023,
	title = {Are Emergent Abilities of Large Language Models a Mirage?},
	url = {http://arxiv.org/abs/2304.15004},
	doi = {10.48550/arXiv.2304.15004},
	abstract = {Recent work claims that large language models display emergent abilities, abilities not present in smaller-scale models that are present in larger-scale models. What makes emergent abilities intriguing is two-fold: their sharpness, transitioning seemingly instantaneously from not present to present, and their unpredictability, appearing at seemingly unforeseeable model scales. Here, we present an alternative explanation for emergent abilities: that for a particular task and model family, when analyzing fixed model outputs, emergent abilities appear due to the researcher's choice of metric rather than due to fundamental changes in model behavior with scale. Specifically, nonlinear or discontinuous metrics produce apparent emergent abilities, whereas linear or continuous metrics produce smooth, continuous predictable changes in model performance. We present our alternative explanation in a simple mathematical model, then test it in three complementary ways: we (1) make, test and confirm three predictions on the effect of metric choice using the {InstructGPT}/{GPT}-3 family on tasks with claimed emergent abilities; (2) make, test and confirm two predictions about metric choices in a meta-analysis of emergent abilities on {BIG}-Bench; and (3) show to choose metrics to produce never-before-seen seemingly emergent abilities in multiple vision tasks across diverse deep networks. Via all three analyses, we provide evidence that alleged emergent abilities evaporate with different metrics or with better statistics, and may not be a fundamental property of scaling {AI} models.},
	number = {{arXiv}:2304.15004},
	publisher = {{arXiv}},
	author = {Schaeffer, Rylan and Miranda, Brando and Koyejo, Sanmi},
	urldate = {2024-02-24},
	date = {2023-05-22},
	eprinttype = {arxiv},
	eprint = {2304.15004 [cs]},
	keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence},
}

@misc{wei_emergent_2022,
	title = {Emergent Abilities of Large Language Models},
	url = {http://arxiv.org/abs/2206.07682},
	doi = {10.48550/arXiv.2206.07682},
	abstract = {Scaling up language models has been shown to predictably improve performance and sample efficiency on a wide range of downstream tasks. This paper instead discusses an unpredictable phenomenon that we refer to as emergent abilities of large language models. We consider an ability to be emergent if it is not present in smaller models but is present in larger models. Thus, emergent abilities cannot be predicted simply by extrapolating the performance of smaller models. The existence of such emergence implies that additional scaling could further expand the range of capabilities of language models.},
	number = {{arXiv}:2206.07682},
	publisher = {{arXiv}},
	author = {Wei, Jason and Tay, Yi and Bommasani, Rishi and Raffel, Colin and Zoph, Barret and Borgeaud, Sebastian and Yogatama, Dani and Bosma, Maarten and Zhou, Denny and Metzler, Donald and Chi, Ed H. and Hashimoto, Tatsunori and Vinyals, Oriol and Liang, Percy and Dean, Jeff and Fedus, William},
	urldate = {2024-02-24},
	date = {2022-10-26},
	eprinttype = {arxiv},
	eprint = {2206.07682 [cs]},
	keywords = {Computer Science - Computation and Language},
}

@inproceedings{hamid_chatgpt_2023,
	location = {Dubai, United Arab Emirates},
	title = {{ChatGPT} and the Chinese Room Argument: An Eloquent {AI} Conversationalist Lacking True Understanding and Consciousness},
	isbn = {9798350327502},
	url = {https://ieeexplore.ieee.org/document/10184233/},
	doi = {10.1109/ITT59889.2023.10184233},
	shorttitle = {{ChatGPT} and the Chinese Room Argument},
	eventtitle = {2023 9th International Conference on Information Technology Trends ({ITT})},
	pages = {238--241},
	booktitle = {2023 9th International Conference on Information Technology Trends ({ITT})},
	publisher = {{IEEE}},
	author = {Hamid, Oussama H.},
	urldate = {2024-02-24},
	date = {2023-05-24},
}

@misc{hanson_strain_2023,
	title = {The strain on scientific publishing},
	url = {http://arxiv.org/abs/2309.15884},
	doi = {10.48550/arXiv.2309.15884},
	abstract = {Scientists are increasingly overwhelmed by the volume of articles being published. Total articles indexed in Scopus and Web of Science have grown exponentially in recent years; in 2022 the article total was 47\% higher than in 2016, which has outpaced the limited growth, if any, in the number of practising scientists. Thus, publication workload per scientist (writing, reviewing, editing) has increased dramatically. We define this problem as the strain on scientific publishing. To analyse this strain, we present five data-driven metrics showing publisher growth, processing times, and citation behaviours. We draw these data from web scrapes, requests for data from publishers, and material that is freely available through publisher websites. Our findings are based on millions of papers produced by leading academic publishers. We find specific groups have disproportionately grown in their articles published per year, contributing to this strain. Some publishers enabled this growth by adopting a strategy of hosting special issues, which publish articles with reduced turnaround times. Given pressures on researchers to publish or perish to be competitive for funding applications, this strain was likely amplified by these offers to publish more articles. We also observed widespread year-over-year inflation of journal impact factors coinciding with this strain, which risks confusing quality signals. Such exponential growth cannot be sustained. The metrics we define here should enable this evolving conversation to reach actionable solutions to address the strain on scientific publishing.},
	number = {{arXiv}:2309.15884},
	publisher = {{arXiv}},
	author = {Hanson, Mark A. and Barreiro, Pablo Gómez and Crosetto, Paolo and Brockington, Dan},
	urldate = {2024-02-25},
	date = {2023-09-27},
	eprinttype = {arxiv},
	eprint = {2309.15884 [cs]},
	keywords = {Computer Science - Digital Libraries},
}

@article{martin_modelling_2020,
	title = {Modelling meaning composition from formalism to mechanism},
	volume = {375},
	issn = {0962-8436, 1471-2970},
	url = {https://royalsocietypublishing.org/doi/10.1098/rstb.2019.0298},
	doi = {10.1098/rstb.2019.0298},
	abstract = {Human thought and language have extraordinary expressive power because meaningful parts can be assembled into more complex semantic structures. This partly underlies our ability to compose meanings into endlessly novel configurations, and sets us apart from other species and current computing devices. Crucially, human behaviour, including language use and linguistic data, indicates that composing parts into complex structures does not threaten the existence of constituent parts as independent units in the system: parts and wholes exist simultaneously yet independently from one another in the mind and brain. This independence is evident in human behaviour, but it seems at odds with what is known about the brain's exquisite sensitivity to statistical patterns: everyday language use is productive and expressive precisely because it can go beyond statistical regularities. Formal theories in philosophy and linguistics explain this fact by assuming that language and thought are compositional: systems of representations that separate a variable (or role) from its values (fillers), such that the meaning of a complex expression is a function of the values assigned to the variables. The debate on whether and how compositional systems could be implemented in minds, brains and machines remains vigorous. However, it has not yet resulted in mechanistic models of semantic composition: how, then, are the constituents of thoughts and sentences put and held together? We review and discuss current efforts at understanding this problem, and we chart possible routes for future research. This article is part of the theme issue ‘Towards mechanistic models of meaning composition’.},
	pages = {20190298},
	number = {1791},
	journaltitle = {Philosophical Transactions of the Royal Society B: Biological Sciences},
	shortjournal = {Phil. Trans. R. Soc. B},
	author = {Martin, Andrea E. and Baggio, Giosuè},
	urldate = {2024-02-24},
	date = {2020-02-03},
	langid = {english},
}

@article{ji_survey_2022,
	title = {A Survey on Knowledge Graphs: Representation, Acquisition, and Applications},
	volume = {33},
	issn = {2162-237X, 2162-2388},
	url = {https://ieeexplore.ieee.org/document/9416312/},
	doi = {10.1109/TNNLS.2021.3070843},
	shorttitle = {A Survey on Knowledge Graphs},
	pages = {494--514},
	number = {2},
	journaltitle = {{IEEE} Transactions on Neural Networks and Learning Systems},
	shortjournal = {{IEEE} Trans. Neural Netw. Learning Syst.},
	author = {Ji, Shaoxiong and Pan, Shirui and Cambria, Erik and Marttinen, Pekka and Yu, Philip S.},
	urldate = {2024-02-10},
	date = {2022-02},
}

@article{phillips_sheavinguniversal_2020,
	title = {Sheaving—a universal construction for semantic compositionality},
	volume = {375},
	issn = {0962-8436, 1471-2970},
	url = {https://royalsocietypublishing.org/doi/10.1098/rstb.2019.0303},
	doi = {10.1098/rstb.2019.0303},
	abstract = {Semantic compositionality—the way that meanings of complex entities obtain from meanings of constituent entities and their structural relations—is supposed to explain certain concomitant cognitive capacities, such as systematicity. Yet, cognitive scientists are divided on mechanisms for compositionality: e.g. a language of thought on one side versus a geometry of thought on the other. Category theory is a field of (meta)mathematics invented to bridge formal divides. We focus on sheaving—a construction at the nexus of algebra and geometry/topology, alluding to an integrative view, to sketch out a category theory perspective on the semantics of compositionality. Sheaving is a universal construction for making inferences from local knowledge, where meaning is grounded by the underlying topological space. Three examples illustrate how topology conveys meaning, in terms of the inclusion relations between the open sets that constitute the space, though the topology is not regarded as the only source of semantic information. In this sense, category (sheaf) theory provides a general framework for semantic compositionality. This article is part of the theme issue ‘Towards mechanistic models of meaning composition’.},
	pages = {20190303},
	number = {1791},
	journaltitle = {Philosophical Transactions of the Royal Society B: Biological Sciences},
	shortjournal = {Phil. Trans. R. Soc. B},
	author = {Phillips, Steven},
	urldate = {2024-02-24},
	date = {2020-02-03},
	langid = {english},
}

@article{kumar_semantic_2021,
	title = {Semantic Memory Search and Retrieval in a Novel Cooperative Word Game: A Comparison of Associative and Distributional Semantic Models},
	volume = {45},
	issn = {0364-0213, 1551-6709},
	url = {https://onlinelibrary.wiley.com/doi/10.1111/cogs.13053},
	doi = {10.1111/cogs.13053},
	shorttitle = {Semantic Memory Search and Retrieval in a Novel Cooperative Word Game},
	abstract = {Considerable work during the past two decades has focused on modeling the structure of semantic memory, although the performance of these models in complex and unconstrained semantic tasks remains relatively understudied. We introduce a two‐player cooperative word game, Connector (based on the boardgame Codenames), and investigate whether similarity metrics derived from two large databases of human free association norms, the University of South Florida norms and the Small World of Words norms, and two distributional semantic models based on large language corpora (word2vec and {GloVe}) predict performance in this game. Participant dyads were presented with 20‐item word boards with word pairs of varying relatedness. The speaker received a word pair from the board (e.g., exam-algebra) and generated a one-word semantic clue (e.g., math), which was used by the guesser to identify the word pair on the board across three attempts. Response times to generate the clue, as well as accuracy and latencies for the guessed word pair, were strongly predicted by the cosine similarity between word pairs and clues in random walk‐based associative models, and to a lesser degree by the distributional models, suggesting that conceptual representations activated during free association were better able to capture search and retrieval processes in the game. Further, the speaker adjusted subsequent clues based on the first attempt by the guesser, who in turn benefited from the adjustment in clues, suggesting a cooperative influence in the game that was effectively captured by both associative and distributional models. These results indicate that both associative and distributional models can capture relatively unconstrained search processes in a cooperative game setting, and Connector is particularly suited to examine communication and semantic search processes.},
	pages = {e13053},
	number = {10},
	journaltitle = {Cognitive Science},
	shortjournal = {Cognitive Science},
	author = {Kumar, Abhilasha A. and Steyvers, Mark and Balota, David A.},
	urldate = {2024-02-10},
	date = {2021-10},
	langid = {english},
}

@article{abbott_random_2015,
	title = {Random walks on semantic networks can resemble optimal foraging},
	volume = {122},
	issn = {1939-1471},
	doi = {10.1037/a0038693},
	abstract = {When people are asked to retrieve members of a category from memory, clusters of semantically related items tend to be retrieved together. A recent article by Hills, Jones, and Todd (2012) argued that this pattern reflects a process similar to optimal strategies for foraging for food in patchy spatial environments, with an individual making a strategic decision to switch away from a cluster of related information as it becomes depleted. We demonstrate that similar behavioral phenomena also emerge from a random walk on a semantic network derived from human word-association data. Random walks provide an alternative account of how people search their memories, postulating an undirected rather than a strategic search process. We show that results resembling optimal foraging are produced by random walks when related items are close together in the semantic network. These findings are reminiscent of arguments from the debate on mental imagery, showing how different processes can produce similar results when operating on different representations.},
	pages = {558--569},
	number = {3},
	journaltitle = {Psychological Review},
	shortjournal = {Psychol Rev},
	author = {Abbott, Joshua T. and Austerweil, Joseph L. and Griffiths, Thomas L.},
	date = {2015-07},
	pmid = {25642588},
	keywords = {Humans, Female, Male, Animals, Mental Recall, Appetitive Behavior, Models, Psychological, Psychological Theory},
}

@article{zemla_estimating_2018,
	title = {Estimating Semantic Networks of Groups and Individuals from Fluency Data},
	volume = {1},
	issn = {2522-0861, 2522-087X},
	url = {http://link.springer.com/10.1007/s42113-018-0003-7},
	doi = {10.1007/s42113-018-0003-7},
	pages = {36--58},
	number = {1},
	journaltitle = {Computational Brain \& Behavior},
	shortjournal = {Comput Brain Behav},
	author = {Zemla, Jeffrey C. and Austerweil, Joseph L.},
	urldate = {2024-02-10},
	date = {2018-03},
	langid = {english},
}

@article{siew_spreadr_2019,
	title = {spreadr: An R package to simulate spreading activation in a network},
	volume = {51},
	issn = {1554-3528},
	url = {http://link.springer.com/10.3758/s13428-018-1186-5},
	doi = {10.3758/s13428-018-1186-5},
	shorttitle = {spreadr},
	pages = {910--929},
	number = {2},
	journaltitle = {Behavior Research Methods},
	shortjournal = {Behav Res},
	author = {Siew, Cynthia S. Q.},
	urldate = {2024-02-10},
	date = {2019-04},
	langid = {english},
}

@misc{park_generative_2023,
	title = {Generative Agents: Interactive Simulacra of Human Behavior},
	url = {http://arxiv.org/abs/2304.03442},
	doi = {10.48550/arXiv.2304.03442},
	shorttitle = {Generative Agents},
	abstract = {Believable proxies of human behavior can empower interactive applications ranging from immersive environments to rehearsal spaces for interpersonal communication to prototyping tools. In this paper, we introduce generative agents--computational software agents that simulate believable human behavior. Generative agents wake up, cook breakfast, and head to work; artists paint, while authors write; they form opinions, notice each other, and initiate conversations; they remember and reflect on days past as they plan the next day. To enable generative agents, we describe an architecture that extends a large language model to store a complete record of the agent's experiences using natural language, synthesize those memories over time into higher-level reflections, and retrieve them dynamically to plan behavior. We instantiate generative agents to populate an interactive sandbox environment inspired by The Sims, where end users can interact with a small town of twenty five agents using natural language. In an evaluation, these generative agents produce believable individual and emergent social behaviors: for example, starting with only a single user-specified notion that one agent wants to throw a Valentine's Day party, the agents autonomously spread invitations to the party over the next two days, make new acquaintances, ask each other out on dates to the party, and coordinate to show up for the party together at the right time. We demonstrate through ablation that the components of our agent architecture--observation, planning, and reflection--each contribute critically to the believability of agent behavior. By fusing large language models with computational, interactive agents, this work introduces architectural and interaction patterns for enabling believable simulations of human behavior.},
	number = {{arXiv}:2304.03442},
	publisher = {{arXiv}},
	author = {Park, Joon Sung and O'Brien, Joseph C. and Cai, Carrie J. and Morris, Meredith Ringel and Liang, Percy and Bernstein, Michael S.},
	urldate = {2024-02-24},
	date = {2023-08-05},
	eprinttype = {arxiv},
	eprint = {2304.03442 [cs]},
	keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Human-Computer Interaction},
}

@article{kaula_problem_1995,
	title = {Problem solving strategies for open information systems},
	volume = {8},
	issn = {09507051},
	url = {https://linkinghub.elsevier.com/retrieve/pii/095070519598901H},
	doi = {10.1016/0950-7051(95)98901-H},
	pages = {235--248},
	number = {5},
	journaltitle = {Knowledge-Based Systems},
	shortjournal = {Knowledge-Based Systems},
	author = {Kaula, Rajeev},
	urldate = {2024-02-24},
	date = {1995-10},
	langid = {english},
}

@misc{dalmaijer_banana_2021,
	title = {Banana for scale: Gauging trends in academic interest by normalising publication rates to common and innocuous keywords},
	url = {http://arxiv.org/abs/2102.06418},
	doi = {10.48550/arXiv.2102.06418},
	shorttitle = {Banana for scale},
	abstract = {Many academics use yearly publication numbers to quantify academic interest for their research topic. While such visualisations are ubiquitous in grant applications, manuscript introductions, and review articles, they fail to account for the rapid growth in scientific publications. As a result, any search term will likely show an increase in supposed "academic interest". One proposed solution is to normalise yearly publication rates by field size, but this is arduous and difficult. Here, we propose an simpler index that normalises keywords of interest by a ubiquitous and innocuous keyword, such as "banana". Alternatively, one could opt for field-specific keywords or hierarchical structures (e.g. {PubMed}'s Medical Subject Headings, {MeSH}) to compute "interest market share". Using this approach, we uncovered plausible trends in academic interest in examples from the medical literature. In neuroimaging, we found that not the supplementary motor area (as was previously claimed), but the prefrontal cortex is the most interesting part of the brain. In cancer research, we found a contemporary preference for cancers with high prevalence and clinical severity, and notable declines in interest for more treatable or likely benign neoplasms. Finally, we found that interest in respiratory viral infections spiked when strains showed potential for pandemic involvement, with {SARS}-{CoV}-2 and the {COVID}-19 pandemic being the most extreme example. In sum, the time is ripe for a quick and easy method to quantify trends in academic interest for anecdotal purposes. We provide such a method, along with software for researchers looking to implement it in their own writing.},
	number = {{arXiv}:2102.06418},
	publisher = {{arXiv}},
	author = {Dalmaijer, Edwin S. and Van Rheede, Joram and Sperr, Edwin V. and Tkotz, Juliane},
	urldate = {2024-02-23},
	date = {2021-02-12},
	eprinttype = {arxiv},
	eprint = {2102.06418 [cs]},
	keywords = {92-00 (Primary), 92-04 (Secondary), Computer Science - Digital Libraries, J.3},
}

